linux/fs/btrfs/inode-map.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/delay.h>
  20#include <linux/kthread.h>
  21#include <linux/pagemap.h>
  22
  23#include "ctree.h"
  24#include "disk-io.h"
  25#include "free-space-cache.h"
  26#include "inode-map.h"
  27#include "transaction.h"
  28
  29static int caching_kthread(void *data)
  30{
  31        struct btrfs_root *root = data;
  32        struct btrfs_fs_info *fs_info = root->fs_info;
  33        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
  34        struct btrfs_key key;
  35        struct btrfs_path *path;
  36        struct extent_buffer *leaf;
  37        u64 last = (u64)-1;
  38        int slot;
  39        int ret;
  40
  41        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
  42                return 0;
  43
  44        path = btrfs_alloc_path();
  45        if (!path)
  46                return -ENOMEM;
  47
  48        /* Since the commit root is read-only, we can safely skip locking. */
  49        path->skip_locking = 1;
  50        path->search_commit_root = 1;
  51        path->reada = READA_FORWARD;
  52
  53        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
  54        key.offset = 0;
  55        key.type = BTRFS_INODE_ITEM_KEY;
  56again:
  57        /* need to make sure the commit_root doesn't disappear */
  58        down_read(&fs_info->commit_root_sem);
  59
  60        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  61        if (ret < 0)
  62                goto out;
  63
  64        while (1) {
  65                if (btrfs_fs_closing(fs_info))
  66                        goto out;
  67
  68                leaf = path->nodes[0];
  69                slot = path->slots[0];
  70                if (slot >= btrfs_header_nritems(leaf)) {
  71                        ret = btrfs_next_leaf(root, path);
  72                        if (ret < 0)
  73                                goto out;
  74                        else if (ret > 0)
  75                                break;
  76
  77                        if (need_resched() ||
  78                            btrfs_transaction_in_commit(fs_info)) {
  79                                leaf = path->nodes[0];
  80
  81                                if (WARN_ON(btrfs_header_nritems(leaf) == 0))
  82                                        break;
  83
  84                                /*
  85                                 * Save the key so we can advances forward
  86                                 * in the next search.
  87                                 */
  88                                btrfs_item_key_to_cpu(leaf, &key, 0);
  89                                btrfs_release_path(path);
  90                                root->ino_cache_progress = last;
  91                                up_read(&fs_info->commit_root_sem);
  92                                schedule_timeout(1);
  93                                goto again;
  94                        } else
  95                                continue;
  96                }
  97
  98                btrfs_item_key_to_cpu(leaf, &key, slot);
  99
 100                if (key.type != BTRFS_INODE_ITEM_KEY)
 101                        goto next;
 102
 103                if (key.objectid >= root->highest_objectid)
 104                        break;
 105
 106                if (last != (u64)-1 && last + 1 != key.objectid) {
 107                        __btrfs_add_free_space(fs_info, ctl, last + 1,
 108                                               key.objectid - last - 1);
 109                        wake_up(&root->ino_cache_wait);
 110                }
 111
 112                last = key.objectid;
 113next:
 114                path->slots[0]++;
 115        }
 116
 117        if (last < root->highest_objectid - 1) {
 118                __btrfs_add_free_space(fs_info, ctl, last + 1,
 119                                       root->highest_objectid - last - 1);
 120        }
 121
 122        spin_lock(&root->ino_cache_lock);
 123        root->ino_cache_state = BTRFS_CACHE_FINISHED;
 124        spin_unlock(&root->ino_cache_lock);
 125
 126        root->ino_cache_progress = (u64)-1;
 127        btrfs_unpin_free_ino(root);
 128out:
 129        wake_up(&root->ino_cache_wait);
 130        up_read(&fs_info->commit_root_sem);
 131
 132        btrfs_free_path(path);
 133
 134        return ret;
 135}
 136
 137static void start_caching(struct btrfs_root *root)
 138{
 139        struct btrfs_fs_info *fs_info = root->fs_info;
 140        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 141        struct task_struct *tsk;
 142        int ret;
 143        u64 objectid;
 144
 145        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
 146                return;
 147
 148        spin_lock(&root->ino_cache_lock);
 149        if (root->ino_cache_state != BTRFS_CACHE_NO) {
 150                spin_unlock(&root->ino_cache_lock);
 151                return;
 152        }
 153
 154        root->ino_cache_state = BTRFS_CACHE_STARTED;
 155        spin_unlock(&root->ino_cache_lock);
 156
 157        ret = load_free_ino_cache(fs_info, root);
 158        if (ret == 1) {
 159                spin_lock(&root->ino_cache_lock);
 160                root->ino_cache_state = BTRFS_CACHE_FINISHED;
 161                spin_unlock(&root->ino_cache_lock);
 162                return;
 163        }
 164
 165        /*
 166         * It can be quite time-consuming to fill the cache by searching
 167         * through the extent tree, and this can keep ino allocation path
 168         * waiting. Therefore at start we quickly find out the highest
 169         * inode number and we know we can use inode numbers which fall in
 170         * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
 171         */
 172        ret = btrfs_find_free_objectid(root, &objectid);
 173        if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
 174                __btrfs_add_free_space(fs_info, ctl, objectid,
 175                                       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
 176        }
 177
 178        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
 179                          root->root_key.objectid);
 180        if (IS_ERR(tsk)) {
 181                btrfs_warn(fs_info, "failed to start inode caching task");
 182                btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE,
 183                                             "disabling inode map caching");
 184        }
 185}
 186
 187int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
 188{
 189        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 190                return btrfs_find_free_objectid(root, objectid);
 191
 192again:
 193        *objectid = btrfs_find_ino_for_alloc(root);
 194
 195        if (*objectid != 0)
 196                return 0;
 197
 198        start_caching(root);
 199
 200        wait_event(root->ino_cache_wait,
 201                   root->ino_cache_state == BTRFS_CACHE_FINISHED ||
 202                   root->free_ino_ctl->free_space > 0);
 203
 204        if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
 205            root->free_ino_ctl->free_space == 0)
 206                return -ENOSPC;
 207        else
 208                goto again;
 209}
 210
 211void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 212{
 213        struct btrfs_fs_info *fs_info = root->fs_info;
 214        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 215
 216        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
 217                return;
 218again:
 219        if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 220                __btrfs_add_free_space(fs_info, pinned, objectid, 1);
 221        } else {
 222                down_write(&fs_info->commit_root_sem);
 223                spin_lock(&root->ino_cache_lock);
 224                if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 225                        spin_unlock(&root->ino_cache_lock);
 226                        up_write(&fs_info->commit_root_sem);
 227                        goto again;
 228                }
 229                spin_unlock(&root->ino_cache_lock);
 230
 231                start_caching(root);
 232
 233                __btrfs_add_free_space(fs_info, pinned, objectid, 1);
 234
 235                up_write(&fs_info->commit_root_sem);
 236        }
 237}
 238
 239/*
 240 * When a transaction is committed, we'll move those inode numbers which are
 241 * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
 242 * others will just be dropped, because the commit root we were searching has
 243 * changed.
 244 *
 245 * Must be called with root->fs_info->commit_root_sem held
 246 */
 247void btrfs_unpin_free_ino(struct btrfs_root *root)
 248{
 249        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 250        struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
 251        spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
 252        struct btrfs_free_space *info;
 253        struct rb_node *n;
 254        u64 count;
 255
 256        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 257                return;
 258
 259        while (1) {
 260                bool add_to_ctl = true;
 261
 262                spin_lock(rbroot_lock);
 263                n = rb_first(rbroot);
 264                if (!n) {
 265                        spin_unlock(rbroot_lock);
 266                        break;
 267                }
 268
 269                info = rb_entry(n, struct btrfs_free_space, offset_index);
 270                BUG_ON(info->bitmap); /* Logic error */
 271
 272                if (info->offset > root->ino_cache_progress)
 273                        add_to_ctl = false;
 274                else if (info->offset + info->bytes > root->ino_cache_progress)
 275                        count = root->ino_cache_progress - info->offset + 1;
 276                else
 277                        count = info->bytes;
 278
 279                rb_erase(&info->offset_index, rbroot);
 280                spin_unlock(rbroot_lock);
 281                if (add_to_ctl)
 282                        __btrfs_add_free_space(root->fs_info, ctl,
 283                                               info->offset, count);
 284                kmem_cache_free(btrfs_free_space_cachep, info);
 285        }
 286}
 287
 288#define INIT_THRESHOLD  ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
 289#define INODES_PER_BITMAP (PAGE_SIZE * 8)
 290
 291/*
 292 * The goal is to keep the memory used by the free_ino tree won't
 293 * exceed the memory if we use bitmaps only.
 294 */
 295static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 296{
 297        struct btrfs_free_space *info;
 298        struct rb_node *n;
 299        int max_ino;
 300        int max_bitmaps;
 301
 302        n = rb_last(&ctl->free_space_offset);
 303        if (!n) {
 304                ctl->extents_thresh = INIT_THRESHOLD;
 305                return;
 306        }
 307        info = rb_entry(n, struct btrfs_free_space, offset_index);
 308
 309        /*
 310         * Find the maximum inode number in the filesystem. Note we
 311         * ignore the fact that this can be a bitmap, because we are
 312         * not doing precise calculation.
 313         */
 314        max_ino = info->bytes - 1;
 315
 316        max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
 317        if (max_bitmaps <= ctl->total_bitmaps) {
 318                ctl->extents_thresh = 0;
 319                return;
 320        }
 321
 322        ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
 323                                PAGE_SIZE / sizeof(*info);
 324}
 325
 326/*
 327 * We don't fall back to bitmap, if we are below the extents threshold
 328 * or this chunk of inode numbers is a big one.
 329 */
 330static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 331                       struct btrfs_free_space *info)
 332{
 333        if (ctl->free_extents < ctl->extents_thresh ||
 334            info->bytes > INODES_PER_BITMAP / 10)
 335                return false;
 336
 337        return true;
 338}
 339
 340static const struct btrfs_free_space_op free_ino_op = {
 341        .recalc_thresholds      = recalculate_thresholds,
 342        .use_bitmap             = use_bitmap,
 343};
 344
 345static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
 346{
 347}
 348
 349static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
 350                              struct btrfs_free_space *info)
 351{
 352        /*
 353         * We always use extents for two reasons:
 354         *
 355         * - The pinned tree is only used during the process of caching
 356         *   work.
 357         * - Make code simpler. See btrfs_unpin_free_ino().
 358         */
 359        return false;
 360}
 361
 362static const struct btrfs_free_space_op pinned_free_ino_op = {
 363        .recalc_thresholds      = pinned_recalc_thresholds,
 364        .use_bitmap             = pinned_use_bitmap,
 365};
 366
 367void btrfs_init_free_ino_ctl(struct btrfs_root *root)
 368{
 369        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 370        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 371
 372        spin_lock_init(&ctl->tree_lock);
 373        ctl->unit = 1;
 374        ctl->start = 0;
 375        ctl->private = NULL;
 376        ctl->op = &free_ino_op;
 377        INIT_LIST_HEAD(&ctl->trimming_ranges);
 378        mutex_init(&ctl->cache_writeout_mutex);
 379
 380        /*
 381         * Initially we allow to use 16K of ram to cache chunks of
 382         * inode numbers before we resort to bitmaps. This is somewhat
 383         * arbitrary, but it will be adjusted in runtime.
 384         */
 385        ctl->extents_thresh = INIT_THRESHOLD;
 386
 387        spin_lock_init(&pinned->tree_lock);
 388        pinned->unit = 1;
 389        pinned->start = 0;
 390        pinned->private = NULL;
 391        pinned->extents_thresh = 0;
 392        pinned->op = &pinned_free_ino_op;
 393}
 394
 395int btrfs_save_ino_cache(struct btrfs_root *root,
 396                         struct btrfs_trans_handle *trans)
 397{
 398        struct btrfs_fs_info *fs_info = root->fs_info;
 399        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 400        struct btrfs_path *path;
 401        struct inode *inode;
 402        struct btrfs_block_rsv *rsv;
 403        struct extent_changeset *data_reserved = NULL;
 404        u64 num_bytes;
 405        u64 alloc_hint = 0;
 406        int ret;
 407        int prealloc;
 408        bool retry = false;
 409
 410        /* only fs tree and subvol/snap needs ino cache */
 411        if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
 412            (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
 413             root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
 414                return 0;
 415
 416        /* Don't save inode cache if we are deleting this root */
 417        if (btrfs_root_refs(&root->root_item) == 0)
 418                return 0;
 419
 420        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
 421                return 0;
 422
 423        path = btrfs_alloc_path();
 424        if (!path)
 425                return -ENOMEM;
 426
 427        rsv = trans->block_rsv;
 428        trans->block_rsv = &fs_info->trans_block_rsv;
 429
 430        num_bytes = trans->bytes_reserved;
 431        /*
 432         * 1 item for inode item insertion if need
 433         * 4 items for inode item update (in the worst case)
 434         * 1 items for slack space if we need do truncation
 435         * 1 item for free space object
 436         * 3 items for pre-allocation
 437         */
 438        trans->bytes_reserved = btrfs_calc_trans_metadata_size(fs_info, 10);
 439        ret = btrfs_block_rsv_add(root, trans->block_rsv,
 440                                  trans->bytes_reserved,
 441                                  BTRFS_RESERVE_NO_FLUSH);
 442        if (ret)
 443                goto out;
 444        trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
 445                                      trans->bytes_reserved, 1);
 446again:
 447        inode = lookup_free_ino_inode(root, path);
 448        if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
 449                ret = PTR_ERR(inode);
 450                goto out_release;
 451        }
 452
 453        if (IS_ERR(inode)) {
 454                BUG_ON(retry); /* Logic error */
 455                retry = true;
 456
 457                ret = create_free_ino_inode(root, trans, path);
 458                if (ret)
 459                        goto out_release;
 460                goto again;
 461        }
 462
 463        BTRFS_I(inode)->generation = 0;
 464        ret = btrfs_update_inode(trans, root, inode);
 465        if (ret) {
 466                btrfs_abort_transaction(trans, ret);
 467                goto out_put;
 468        }
 469
 470        if (i_size_read(inode) > 0) {
 471                ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
 472                if (ret) {
 473                        if (ret != -ENOSPC)
 474                                btrfs_abort_transaction(trans, ret);
 475                        goto out_put;
 476                }
 477        }
 478
 479        spin_lock(&root->ino_cache_lock);
 480        if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
 481                ret = -1;
 482                spin_unlock(&root->ino_cache_lock);
 483                goto out_put;
 484        }
 485        spin_unlock(&root->ino_cache_lock);
 486
 487        spin_lock(&ctl->tree_lock);
 488        prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
 489        prealloc = ALIGN(prealloc, PAGE_SIZE);
 490        prealloc += ctl->total_bitmaps * PAGE_SIZE;
 491        spin_unlock(&ctl->tree_lock);
 492
 493        /* Just to make sure we have enough space */
 494        prealloc += 8 * PAGE_SIZE;
 495
 496        ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
 497        if (ret)
 498                goto out_put;
 499
 500        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 501                                              prealloc, prealloc, &alloc_hint);
 502        if (ret) {
 503                btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
 504                goto out_put;
 505        }
 506
 507        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 508        btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
 509out_put:
 510        iput(inode);
 511out_release:
 512        trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
 513                                      trans->bytes_reserved, 0);
 514        btrfs_block_rsv_release(fs_info, trans->block_rsv,
 515                                trans->bytes_reserved);
 516out:
 517        trans->block_rsv = rsv;
 518        trans->bytes_reserved = num_bytes;
 519
 520        btrfs_free_path(path);
 521        extent_changeset_free(data_reserved);
 522        return ret;
 523}
 524
 525int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 526{
 527        struct btrfs_path *path;
 528        int ret;
 529        struct extent_buffer *l;
 530        struct btrfs_key search_key;
 531        struct btrfs_key found_key;
 532        int slot;
 533
 534        path = btrfs_alloc_path();
 535        if (!path)
 536                return -ENOMEM;
 537
 538        search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
 539        search_key.type = -1;
 540        search_key.offset = (u64)-1;
 541        ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 542        if (ret < 0)
 543                goto error;
 544        BUG_ON(ret == 0); /* Corruption */
 545        if (path->slots[0] > 0) {
 546                slot = path->slots[0] - 1;
 547                l = path->nodes[0];
 548                btrfs_item_key_to_cpu(l, &found_key, slot);
 549                *objectid = max_t(u64, found_key.objectid,
 550                                  BTRFS_FIRST_FREE_OBJECTID - 1);
 551        } else {
 552                *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
 553        }
 554        ret = 0;
 555error:
 556        btrfs_free_path(path);
 557        return ret;
 558}
 559
 560int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
 561{
 562        int ret;
 563        mutex_lock(&root->objectid_mutex);
 564
 565        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
 566                btrfs_warn(root->fs_info,
 567                           "the objectid of root %llu reaches its highest value",
 568                           root->root_key.objectid);
 569                ret = -ENOSPC;
 570                goto out;
 571        }
 572
 573        *objectid = ++root->highest_objectid;
 574        ret = 0;
 575out:
 576        mutex_unlock(&root->objectid_mutex);
 577        return ret;
 578}
 579