linux/fs/btrfs/inode-map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2007 Oracle.  All rights reserved.
   4 */
   5
   6#include <linux/kthread.h>
   7#include <linux/pagemap.h>
   8
   9#include "ctree.h"
  10#include "disk-io.h"
  11#include "free-space-cache.h"
  12#include "inode-map.h"
  13#include "transaction.h"
  14
  15static int caching_kthread(void *data)
  16{
  17        struct btrfs_root *root = data;
  18        struct btrfs_fs_info *fs_info = root->fs_info;
  19        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
  20        struct btrfs_key key;
  21        struct btrfs_path *path;
  22        struct extent_buffer *leaf;
  23        u64 last = (u64)-1;
  24        int slot;
  25        int ret;
  26
  27        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
  28                return 0;
  29
  30        path = btrfs_alloc_path();
  31        if (!path)
  32                return -ENOMEM;
  33
  34        /* Since the commit root is read-only, we can safely skip locking. */
  35        path->skip_locking = 1;
  36        path->search_commit_root = 1;
  37        path->reada = READA_FORWARD;
  38
  39        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
  40        key.offset = 0;
  41        key.type = BTRFS_INODE_ITEM_KEY;
  42again:
  43        /* need to make sure the commit_root doesn't disappear */
  44        down_read(&fs_info->commit_root_sem);
  45
  46        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  47        if (ret < 0)
  48                goto out;
  49
  50        while (1) {
  51                if (btrfs_fs_closing(fs_info))
  52                        goto out;
  53
  54                leaf = path->nodes[0];
  55                slot = path->slots[0];
  56                if (slot >= btrfs_header_nritems(leaf)) {
  57                        ret = btrfs_next_leaf(root, path);
  58                        if (ret < 0)
  59                                goto out;
  60                        else if (ret > 0)
  61                                break;
  62
  63                        if (need_resched() ||
  64                            btrfs_transaction_in_commit(fs_info)) {
  65                                leaf = path->nodes[0];
  66
  67                                if (WARN_ON(btrfs_header_nritems(leaf) == 0))
  68                                        break;
  69
  70                                /*
  71                                 * Save the key so we can advances forward
  72                                 * in the next search.
  73                                 */
  74                                btrfs_item_key_to_cpu(leaf, &key, 0);
  75                                btrfs_release_path(path);
  76                                root->ino_cache_progress = last;
  77                                up_read(&fs_info->commit_root_sem);
  78                                schedule_timeout(1);
  79                                goto again;
  80                        } else
  81                                continue;
  82                }
  83
  84                btrfs_item_key_to_cpu(leaf, &key, slot);
  85
  86                if (key.type != BTRFS_INODE_ITEM_KEY)
  87                        goto next;
  88
  89                if (key.objectid >= root->highest_objectid)
  90                        break;
  91
  92                if (last != (u64)-1 && last + 1 != key.objectid) {
  93                        __btrfs_add_free_space(fs_info, ctl, last + 1,
  94                                               key.objectid - last - 1);
  95                        wake_up(&root->ino_cache_wait);
  96                }
  97
  98                last = key.objectid;
  99next:
 100                path->slots[0]++;
 101        }
 102
 103        if (last < root->highest_objectid - 1) {
 104                __btrfs_add_free_space(fs_info, ctl, last + 1,
 105                                       root->highest_objectid - last - 1);
 106        }
 107
 108        spin_lock(&root->ino_cache_lock);
 109        root->ino_cache_state = BTRFS_CACHE_FINISHED;
 110        spin_unlock(&root->ino_cache_lock);
 111
 112        root->ino_cache_progress = (u64)-1;
 113        btrfs_unpin_free_ino(root);
 114out:
 115        wake_up(&root->ino_cache_wait);
 116        up_read(&fs_info->commit_root_sem);
 117
 118        btrfs_free_path(path);
 119
 120        return ret;
 121}
 122
 123static void start_caching(struct btrfs_root *root)
 124{
 125        struct btrfs_fs_info *fs_info = root->fs_info;
 126        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 127        struct task_struct *tsk;
 128        int ret;
 129        u64 objectid;
 130
 131        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
 132                return;
 133
 134        spin_lock(&root->ino_cache_lock);
 135        if (root->ino_cache_state != BTRFS_CACHE_NO) {
 136                spin_unlock(&root->ino_cache_lock);
 137                return;
 138        }
 139
 140        root->ino_cache_state = BTRFS_CACHE_STARTED;
 141        spin_unlock(&root->ino_cache_lock);
 142
 143        ret = load_free_ino_cache(fs_info, root);
 144        if (ret == 1) {
 145                spin_lock(&root->ino_cache_lock);
 146                root->ino_cache_state = BTRFS_CACHE_FINISHED;
 147                spin_unlock(&root->ino_cache_lock);
 148                return;
 149        }
 150
 151        /*
 152         * It can be quite time-consuming to fill the cache by searching
 153         * through the extent tree, and this can keep ino allocation path
 154         * waiting. Therefore at start we quickly find out the highest
 155         * inode number and we know we can use inode numbers which fall in
 156         * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
 157         */
 158        ret = btrfs_find_free_objectid(root, &objectid);
 159        if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
 160                __btrfs_add_free_space(fs_info, ctl, objectid,
 161                                       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
 162        }
 163
 164        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
 165                          root->root_key.objectid);
 166        if (IS_ERR(tsk)) {
 167                btrfs_warn(fs_info, "failed to start inode caching task");
 168                btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE,
 169                                             "disabling inode map caching");
 170        }
 171}
 172
 173int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
 174{
 175        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 176                return btrfs_find_free_objectid(root, objectid);
 177
 178again:
 179        *objectid = btrfs_find_ino_for_alloc(root);
 180
 181        if (*objectid != 0)
 182                return 0;
 183
 184        start_caching(root);
 185
 186        wait_event(root->ino_cache_wait,
 187                   root->ino_cache_state == BTRFS_CACHE_FINISHED ||
 188                   root->free_ino_ctl->free_space > 0);
 189
 190        if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
 191            root->free_ino_ctl->free_space == 0)
 192                return -ENOSPC;
 193        else
 194                goto again;
 195}
 196
 197void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 198{
 199        struct btrfs_fs_info *fs_info = root->fs_info;
 200        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 201
 202        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
 203                return;
 204again:
 205        if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 206                __btrfs_add_free_space(fs_info, pinned, objectid, 1);
 207        } else {
 208                down_write(&fs_info->commit_root_sem);
 209                spin_lock(&root->ino_cache_lock);
 210                if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 211                        spin_unlock(&root->ino_cache_lock);
 212                        up_write(&fs_info->commit_root_sem);
 213                        goto again;
 214                }
 215                spin_unlock(&root->ino_cache_lock);
 216
 217                start_caching(root);
 218
 219                __btrfs_add_free_space(fs_info, pinned, objectid, 1);
 220
 221                up_write(&fs_info->commit_root_sem);
 222        }
 223}
 224
 225/*
 226 * When a transaction is committed, we'll move those inode numbers which are
 227 * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
 228 * others will just be dropped, because the commit root we were searching has
 229 * changed.
 230 *
 231 * Must be called with root->fs_info->commit_root_sem held
 232 */
 233void btrfs_unpin_free_ino(struct btrfs_root *root)
 234{
 235        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 236        struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
 237        spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
 238        struct btrfs_free_space *info;
 239        struct rb_node *n;
 240        u64 count;
 241
 242        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 243                return;
 244
 245        while (1) {
 246                spin_lock(rbroot_lock);
 247                n = rb_first(rbroot);
 248                if (!n) {
 249                        spin_unlock(rbroot_lock);
 250                        break;
 251                }
 252
 253                info = rb_entry(n, struct btrfs_free_space, offset_index);
 254                BUG_ON(info->bitmap); /* Logic error */
 255
 256                if (info->offset > root->ino_cache_progress)
 257                        count = 0;
 258                else
 259                        count = min(root->ino_cache_progress - info->offset + 1,
 260                                    info->bytes);
 261
 262                rb_erase(&info->offset_index, rbroot);
 263                spin_unlock(rbroot_lock);
 264                if (count)
 265                        __btrfs_add_free_space(root->fs_info, ctl,
 266                                               info->offset, count);
 267                kmem_cache_free(btrfs_free_space_cachep, info);
 268        }
 269}
 270
 271#define INIT_THRESHOLD  ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
 272#define INODES_PER_BITMAP (PAGE_SIZE * 8)
 273
 274/*
 275 * The goal is to keep the memory used by the free_ino tree won't
 276 * exceed the memory if we use bitmaps only.
 277 */
 278static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 279{
 280        struct btrfs_free_space *info;
 281        struct rb_node *n;
 282        int max_ino;
 283        int max_bitmaps;
 284
 285        n = rb_last(&ctl->free_space_offset);
 286        if (!n) {
 287                ctl->extents_thresh = INIT_THRESHOLD;
 288                return;
 289        }
 290        info = rb_entry(n, struct btrfs_free_space, offset_index);
 291
 292        /*
 293         * Find the maximum inode number in the filesystem. Note we
 294         * ignore the fact that this can be a bitmap, because we are
 295         * not doing precise calculation.
 296         */
 297        max_ino = info->bytes - 1;
 298
 299        max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
 300        if (max_bitmaps <= ctl->total_bitmaps) {
 301                ctl->extents_thresh = 0;
 302                return;
 303        }
 304
 305        ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
 306                                PAGE_SIZE / sizeof(*info);
 307}
 308
 309/*
 310 * We don't fall back to bitmap, if we are below the extents threshold
 311 * or this chunk of inode numbers is a big one.
 312 */
 313static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 314                       struct btrfs_free_space *info)
 315{
 316        if (ctl->free_extents < ctl->extents_thresh ||
 317            info->bytes > INODES_PER_BITMAP / 10)
 318                return false;
 319
 320        return true;
 321}
 322
 323static const struct btrfs_free_space_op free_ino_op = {
 324        .recalc_thresholds      = recalculate_thresholds,
 325        .use_bitmap             = use_bitmap,
 326};
 327
 328static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
 329{
 330}
 331
 332static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
 333                              struct btrfs_free_space *info)
 334{
 335        /*
 336         * We always use extents for two reasons:
 337         *
 338         * - The pinned tree is only used during the process of caching
 339         *   work.
 340         * - Make code simpler. See btrfs_unpin_free_ino().
 341         */
 342        return false;
 343}
 344
 345static const struct btrfs_free_space_op pinned_free_ino_op = {
 346        .recalc_thresholds      = pinned_recalc_thresholds,
 347        .use_bitmap             = pinned_use_bitmap,
 348};
 349
 350void btrfs_init_free_ino_ctl(struct btrfs_root *root)
 351{
 352        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 353        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 354
 355        spin_lock_init(&ctl->tree_lock);
 356        ctl->unit = 1;
 357        ctl->start = 0;
 358        ctl->private = NULL;
 359        ctl->op = &free_ino_op;
 360        INIT_LIST_HEAD(&ctl->trimming_ranges);
 361        mutex_init(&ctl->cache_writeout_mutex);
 362
 363        /*
 364         * Initially we allow to use 16K of ram to cache chunks of
 365         * inode numbers before we resort to bitmaps. This is somewhat
 366         * arbitrary, but it will be adjusted in runtime.
 367         */
 368        ctl->extents_thresh = INIT_THRESHOLD;
 369
 370        spin_lock_init(&pinned->tree_lock);
 371        pinned->unit = 1;
 372        pinned->start = 0;
 373        pinned->private = NULL;
 374        pinned->extents_thresh = 0;
 375        pinned->op = &pinned_free_ino_op;
 376}
 377
 378int btrfs_save_ino_cache(struct btrfs_root *root,
 379                         struct btrfs_trans_handle *trans)
 380{
 381        struct btrfs_fs_info *fs_info = root->fs_info;
 382        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 383        struct btrfs_path *path;
 384        struct inode *inode;
 385        struct btrfs_block_rsv *rsv;
 386        struct extent_changeset *data_reserved = NULL;
 387        u64 num_bytes;
 388        u64 alloc_hint = 0;
 389        int ret;
 390        int prealloc;
 391        bool retry = false;
 392
 393        /* only fs tree and subvol/snap needs ino cache */
 394        if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
 395            (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
 396             root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
 397                return 0;
 398
 399        /* Don't save inode cache if we are deleting this root */
 400        if (btrfs_root_refs(&root->root_item) == 0)
 401                return 0;
 402
 403        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
 404                return 0;
 405
 406        path = btrfs_alloc_path();
 407        if (!path)
 408                return -ENOMEM;
 409
 410        rsv = trans->block_rsv;
 411        trans->block_rsv = &fs_info->trans_block_rsv;
 412
 413        num_bytes = trans->bytes_reserved;
 414        /*
 415         * 1 item for inode item insertion if need
 416         * 4 items for inode item update (in the worst case)
 417         * 1 items for slack space if we need do truncation
 418         * 1 item for free space object
 419         * 3 items for pre-allocation
 420         */
 421        trans->bytes_reserved = btrfs_calc_trans_metadata_size(fs_info, 10);
 422        ret = btrfs_block_rsv_add(root, trans->block_rsv,
 423                                  trans->bytes_reserved,
 424                                  BTRFS_RESERVE_NO_FLUSH);
 425        if (ret)
 426                goto out;
 427        trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
 428                                      trans->bytes_reserved, 1);
 429again:
 430        inode = lookup_free_ino_inode(root, path);
 431        if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
 432                ret = PTR_ERR(inode);
 433                goto out_release;
 434        }
 435
 436        if (IS_ERR(inode)) {
 437                BUG_ON(retry); /* Logic error */
 438                retry = true;
 439
 440                ret = create_free_ino_inode(root, trans, path);
 441                if (ret)
 442                        goto out_release;
 443                goto again;
 444        }
 445
 446        BTRFS_I(inode)->generation = 0;
 447        ret = btrfs_update_inode(trans, root, inode);
 448        if (ret) {
 449                btrfs_abort_transaction(trans, ret);
 450                goto out_put;
 451        }
 452
 453        if (i_size_read(inode) > 0) {
 454                ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
 455                if (ret) {
 456                        if (ret != -ENOSPC)
 457                                btrfs_abort_transaction(trans, ret);
 458                        goto out_put;
 459                }
 460        }
 461
 462        spin_lock(&root->ino_cache_lock);
 463        if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
 464                ret = -1;
 465                spin_unlock(&root->ino_cache_lock);
 466                goto out_put;
 467        }
 468        spin_unlock(&root->ino_cache_lock);
 469
 470        spin_lock(&ctl->tree_lock);
 471        prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
 472        prealloc = ALIGN(prealloc, PAGE_SIZE);
 473        prealloc += ctl->total_bitmaps * PAGE_SIZE;
 474        spin_unlock(&ctl->tree_lock);
 475
 476        /* Just to make sure we have enough space */
 477        prealloc += 8 * PAGE_SIZE;
 478
 479        ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
 480        if (ret)
 481                goto out_put;
 482
 483        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 484                                              prealloc, prealloc, &alloc_hint);
 485        if (ret) {
 486                btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
 487                goto out_put;
 488        }
 489
 490        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 491        btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
 492out_put:
 493        iput(inode);
 494out_release:
 495        trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
 496                                      trans->bytes_reserved, 0);
 497        btrfs_block_rsv_release(fs_info, trans->block_rsv,
 498                                trans->bytes_reserved);
 499out:
 500        trans->block_rsv = rsv;
 501        trans->bytes_reserved = num_bytes;
 502
 503        btrfs_free_path(path);
 504        extent_changeset_free(data_reserved);
 505        return ret;
 506}
 507
 508int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 509{
 510        struct btrfs_path *path;
 511        int ret;
 512        struct extent_buffer *l;
 513        struct btrfs_key search_key;
 514        struct btrfs_key found_key;
 515        int slot;
 516
 517        path = btrfs_alloc_path();
 518        if (!path)
 519                return -ENOMEM;
 520
 521        search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
 522        search_key.type = -1;
 523        search_key.offset = (u64)-1;
 524        ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 525        if (ret < 0)
 526                goto error;
 527        BUG_ON(ret == 0); /* Corruption */
 528        if (path->slots[0] > 0) {
 529                slot = path->slots[0] - 1;
 530                l = path->nodes[0];
 531                btrfs_item_key_to_cpu(l, &found_key, slot);
 532                *objectid = max_t(u64, found_key.objectid,
 533                                  BTRFS_FIRST_FREE_OBJECTID - 1);
 534        } else {
 535                *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
 536        }
 537        ret = 0;
 538error:
 539        btrfs_free_path(path);
 540        return ret;
 541}
 542
 543int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
 544{
 545        int ret;
 546        mutex_lock(&root->objectid_mutex);
 547
 548        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
 549                btrfs_warn(root->fs_info,
 550                           "the objectid of root %llu reaches its highest value",
 551                           root->root_key.objectid);
 552                ret = -ENOSPC;
 553                goto out;
 554        }
 555
 556        *objectid = ++root->highest_objectid;
 557        ret = 0;
 558out:
 559        mutex_unlock(&root->objectid_mutex);
 560        return ret;
 561}
 562