linux/fs/btrfs/free-space-cache.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2008 Red Hat.  All rights reserved.
   4 */
   5
   6#include <linux/pagemap.h>
   7#include <linux/sched.h>
   8#include <linux/sched/signal.h>
   9#include <linux/slab.h>
  10#include <linux/math64.h>
  11#include <linux/ratelimit.h>
  12#include <linux/error-injection.h>
  13#include <linux/sched/mm.h>
  14#include "ctree.h"
  15#include "free-space-cache.h"
  16#include "transaction.h"
  17#include "disk-io.h"
  18#include "extent_io.h"
  19#include "inode-map.h"
  20#include "volumes.h"
  21#include "space-info.h"
  22#include "delalloc-space.h"
  23#include "block-group.h"
  24#include "discard.h"
  25
  26#define BITS_PER_BITMAP         (PAGE_SIZE * 8UL)
  27#define MAX_CACHE_BYTES_PER_GIG SZ_64K
  28#define FORCE_EXTENT_THRESHOLD  SZ_1M
  29
  30struct btrfs_trim_range {
  31        u64 start;
  32        u64 bytes;
  33        struct list_head list;
  34};
  35
  36static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
  37                                struct btrfs_free_space *bitmap_info);
  38static int link_free_space(struct btrfs_free_space_ctl *ctl,
  39                           struct btrfs_free_space *info);
  40static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
  41                              struct btrfs_free_space *info);
  42static int btrfs_wait_cache_io_root(struct btrfs_root *root,
  43                             struct btrfs_trans_handle *trans,
  44                             struct btrfs_io_ctl *io_ctl,
  45                             struct btrfs_path *path);
  46
  47static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
  48                                               struct btrfs_path *path,
  49                                               u64 offset)
  50{
  51        struct btrfs_fs_info *fs_info = root->fs_info;
  52        struct btrfs_key key;
  53        struct btrfs_key location;
  54        struct btrfs_disk_key disk_key;
  55        struct btrfs_free_space_header *header;
  56        struct extent_buffer *leaf;
  57        struct inode *inode = NULL;
  58        unsigned nofs_flag;
  59        int ret;
  60
  61        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  62        key.offset = offset;
  63        key.type = 0;
  64
  65        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  66        if (ret < 0)
  67                return ERR_PTR(ret);
  68        if (ret > 0) {
  69                btrfs_release_path(path);
  70                return ERR_PTR(-ENOENT);
  71        }
  72
  73        leaf = path->nodes[0];
  74        header = btrfs_item_ptr(leaf, path->slots[0],
  75                                struct btrfs_free_space_header);
  76        btrfs_free_space_key(leaf, header, &disk_key);
  77        btrfs_disk_key_to_cpu(&location, &disk_key);
  78        btrfs_release_path(path);
  79
  80        /*
  81         * We are often under a trans handle at this point, so we need to make
  82         * sure NOFS is set to keep us from deadlocking.
  83         */
  84        nofs_flag = memalloc_nofs_save();
  85        inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
  86        btrfs_release_path(path);
  87        memalloc_nofs_restore(nofs_flag);
  88        if (IS_ERR(inode))
  89                return inode;
  90
  91        mapping_set_gfp_mask(inode->i_mapping,
  92                        mapping_gfp_constraint(inode->i_mapping,
  93                        ~(__GFP_FS | __GFP_HIGHMEM)));
  94
  95        return inode;
  96}
  97
  98struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
  99                struct btrfs_path *path)
 100{
 101        struct btrfs_fs_info *fs_info = block_group->fs_info;
 102        struct inode *inode = NULL;
 103        u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
 104
 105        spin_lock(&block_group->lock);
 106        if (block_group->inode)
 107                inode = igrab(block_group->inode);
 108        spin_unlock(&block_group->lock);
 109        if (inode)
 110                return inode;
 111
 112        inode = __lookup_free_space_inode(fs_info->tree_root, path,
 113                                          block_group->start);
 114        if (IS_ERR(inode))
 115                return inode;
 116
 117        spin_lock(&block_group->lock);
 118        if (!((BTRFS_I(inode)->flags & flags) == flags)) {
 119                btrfs_info(fs_info, "Old style space inode found, converting.");
 120                BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
 121                        BTRFS_INODE_NODATACOW;
 122                block_group->disk_cache_state = BTRFS_DC_CLEAR;
 123        }
 124
 125        if (!block_group->iref) {
 126                block_group->inode = igrab(inode);
 127                block_group->iref = 1;
 128        }
 129        spin_unlock(&block_group->lock);
 130
 131        return inode;
 132}
 133
 134static int __create_free_space_inode(struct btrfs_root *root,
 135                                     struct btrfs_trans_handle *trans,
 136                                     struct btrfs_path *path,
 137                                     u64 ino, u64 offset)
 138{
 139        struct btrfs_key key;
 140        struct btrfs_disk_key disk_key;
 141        struct btrfs_free_space_header *header;
 142        struct btrfs_inode_item *inode_item;
 143        struct extent_buffer *leaf;
 144        u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
 145        int ret;
 146
 147        ret = btrfs_insert_empty_inode(trans, root, path, ino);
 148        if (ret)
 149                return ret;
 150
 151        /* We inline crc's for the free disk space cache */
 152        if (ino != BTRFS_FREE_INO_OBJECTID)
 153                flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
 154
 155        leaf = path->nodes[0];
 156        inode_item = btrfs_item_ptr(leaf, path->slots[0],
 157                                    struct btrfs_inode_item);
 158        btrfs_item_key(leaf, &disk_key, path->slots[0]);
 159        memzero_extent_buffer(leaf, (unsigned long)inode_item,
 160                             sizeof(*inode_item));
 161        btrfs_set_inode_generation(leaf, inode_item, trans->transid);
 162        btrfs_set_inode_size(leaf, inode_item, 0);
 163        btrfs_set_inode_nbytes(leaf, inode_item, 0);
 164        btrfs_set_inode_uid(leaf, inode_item, 0);
 165        btrfs_set_inode_gid(leaf, inode_item, 0);
 166        btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
 167        btrfs_set_inode_flags(leaf, inode_item, flags);
 168        btrfs_set_inode_nlink(leaf, inode_item, 1);
 169        btrfs_set_inode_transid(leaf, inode_item, trans->transid);
 170        btrfs_set_inode_block_group(leaf, inode_item, offset);
 171        btrfs_mark_buffer_dirty(leaf);
 172        btrfs_release_path(path);
 173
 174        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
 175        key.offset = offset;
 176        key.type = 0;
 177        ret = btrfs_insert_empty_item(trans, root, path, &key,
 178                                      sizeof(struct btrfs_free_space_header));
 179        if (ret < 0) {
 180                btrfs_release_path(path);
 181                return ret;
 182        }
 183
 184        leaf = path->nodes[0];
 185        header = btrfs_item_ptr(leaf, path->slots[0],
 186                                struct btrfs_free_space_header);
 187        memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
 188        btrfs_set_free_space_key(leaf, header, &disk_key);
 189        btrfs_mark_buffer_dirty(leaf);
 190        btrfs_release_path(path);
 191
 192        return 0;
 193}
 194
 195int create_free_space_inode(struct btrfs_trans_handle *trans,
 196                            struct btrfs_block_group *block_group,
 197                            struct btrfs_path *path)
 198{
 199        int ret;
 200        u64 ino;
 201
 202        ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
 203        if (ret < 0)
 204                return ret;
 205
 206        return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
 207                                         ino, block_group->start);
 208}
 209
 210int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
 211                                       struct btrfs_block_rsv *rsv)
 212{
 213        u64 needed_bytes;
 214        int ret;
 215
 216        /* 1 for slack space, 1 for updating the inode */
 217        needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
 218                btrfs_calc_metadata_size(fs_info, 1);
 219
 220        spin_lock(&rsv->lock);
 221        if (rsv->reserved < needed_bytes)
 222                ret = -ENOSPC;
 223        else
 224                ret = 0;
 225        spin_unlock(&rsv->lock);
 226        return ret;
 227}
 228
 229int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
 230                                    struct btrfs_block_group *block_group,
 231                                    struct inode *inode)
 232{
 233        struct btrfs_root *root = BTRFS_I(inode)->root;
 234        int ret = 0;
 235        bool locked = false;
 236
 237        if (block_group) {
 238                struct btrfs_path *path = btrfs_alloc_path();
 239
 240                if (!path) {
 241                        ret = -ENOMEM;
 242                        goto fail;
 243                }
 244                locked = true;
 245                mutex_lock(&trans->transaction->cache_write_mutex);
 246                if (!list_empty(&block_group->io_list)) {
 247                        list_del_init(&block_group->io_list);
 248
 249                        btrfs_wait_cache_io(trans, block_group, path);
 250                        btrfs_put_block_group(block_group);
 251                }
 252
 253                /*
 254                 * now that we've truncated the cache away, its no longer
 255                 * setup or written
 256                 */
 257                spin_lock(&block_group->lock);
 258                block_group->disk_cache_state = BTRFS_DC_CLEAR;
 259                spin_unlock(&block_group->lock);
 260                btrfs_free_path(path);
 261        }
 262
 263        btrfs_i_size_write(BTRFS_I(inode), 0);
 264        truncate_pagecache(inode, 0);
 265
 266        /*
 267         * We skip the throttling logic for free space cache inodes, so we don't
 268         * need to check for -EAGAIN.
 269         */
 270        ret = btrfs_truncate_inode_items(trans, root, inode,
 271                                         0, BTRFS_EXTENT_DATA_KEY);
 272        if (ret)
 273                goto fail;
 274
 275        ret = btrfs_update_inode(trans, root, inode);
 276
 277fail:
 278        if (locked)
 279                mutex_unlock(&trans->transaction->cache_write_mutex);
 280        if (ret)
 281                btrfs_abort_transaction(trans, ret);
 282
 283        return ret;
 284}
 285
 286static void readahead_cache(struct inode *inode)
 287{
 288        struct file_ra_state *ra;
 289        unsigned long last_index;
 290
 291        ra = kzalloc(sizeof(*ra), GFP_NOFS);
 292        if (!ra)
 293                return;
 294
 295        file_ra_state_init(ra, inode->i_mapping);
 296        last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
 297
 298        page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
 299
 300        kfree(ra);
 301}
 302
 303static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
 304                       int write)
 305{
 306        int num_pages;
 307        int check_crcs = 0;
 308
 309        num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 310
 311        if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
 312                check_crcs = 1;
 313
 314        /* Make sure we can fit our crcs and generation into the first page */
 315        if (write && check_crcs &&
 316            (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
 317                return -ENOSPC;
 318
 319        memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
 320
 321        io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
 322        if (!io_ctl->pages)
 323                return -ENOMEM;
 324
 325        io_ctl->num_pages = num_pages;
 326        io_ctl->fs_info = btrfs_sb(inode->i_sb);
 327        io_ctl->check_crcs = check_crcs;
 328        io_ctl->inode = inode;
 329
 330        return 0;
 331}
 332ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
 333
 334static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
 335{
 336        kfree(io_ctl->pages);
 337        io_ctl->pages = NULL;
 338}
 339
 340static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
 341{
 342        if (io_ctl->cur) {
 343                io_ctl->cur = NULL;
 344                io_ctl->orig = NULL;
 345        }
 346}
 347
 348static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
 349{
 350        ASSERT(io_ctl->index < io_ctl->num_pages);
 351        io_ctl->page = io_ctl->pages[io_ctl->index++];
 352        io_ctl->cur = page_address(io_ctl->page);
 353        io_ctl->orig = io_ctl->cur;
 354        io_ctl->size = PAGE_SIZE;
 355        if (clear)
 356                clear_page(io_ctl->cur);
 357}
 358
 359static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
 360{
 361        int i;
 362
 363        io_ctl_unmap_page(io_ctl);
 364
 365        for (i = 0; i < io_ctl->num_pages; i++) {
 366                if (io_ctl->pages[i]) {
 367                        ClearPageChecked(io_ctl->pages[i]);
 368                        unlock_page(io_ctl->pages[i]);
 369                        put_page(io_ctl->pages[i]);
 370                }
 371        }
 372}
 373
 374static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
 375{
 376        struct page *page;
 377        struct inode *inode = io_ctl->inode;
 378        gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 379        int i;
 380
 381        for (i = 0; i < io_ctl->num_pages; i++) {
 382                page = find_or_create_page(inode->i_mapping, i, mask);
 383                if (!page) {
 384                        io_ctl_drop_pages(io_ctl);
 385                        return -ENOMEM;
 386                }
 387                io_ctl->pages[i] = page;
 388                if (uptodate && !PageUptodate(page)) {
 389                        btrfs_readpage(NULL, page);
 390                        lock_page(page);
 391                        if (page->mapping != inode->i_mapping) {
 392                                btrfs_err(BTRFS_I(inode)->root->fs_info,
 393                                          "free space cache page truncated");
 394                                io_ctl_drop_pages(io_ctl);
 395                                return -EIO;
 396                        }
 397                        if (!PageUptodate(page)) {
 398                                btrfs_err(BTRFS_I(inode)->root->fs_info,
 399                                           "error reading free space cache");
 400                                io_ctl_drop_pages(io_ctl);
 401                                return -EIO;
 402                        }
 403                }
 404        }
 405
 406        for (i = 0; i < io_ctl->num_pages; i++) {
 407                clear_page_dirty_for_io(io_ctl->pages[i]);
 408                set_page_extent_mapped(io_ctl->pages[i]);
 409        }
 410
 411        return 0;
 412}
 413
 414static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
 415{
 416        io_ctl_map_page(io_ctl, 1);
 417
 418        /*
 419         * Skip the csum areas.  If we don't check crcs then we just have a
 420         * 64bit chunk at the front of the first page.
 421         */
 422        if (io_ctl->check_crcs) {
 423                io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
 424                io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
 425        } else {
 426                io_ctl->cur += sizeof(u64);
 427                io_ctl->size -= sizeof(u64) * 2;
 428        }
 429
 430        put_unaligned_le64(generation, io_ctl->cur);
 431        io_ctl->cur += sizeof(u64);
 432}
 433
 434static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
 435{
 436        u64 cache_gen;
 437
 438        /*
 439         * Skip the crc area.  If we don't check crcs then we just have a 64bit
 440         * chunk at the front of the first page.
 441         */
 442        if (io_ctl->check_crcs) {
 443                io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
 444                io_ctl->size -= sizeof(u64) +
 445                        (sizeof(u32) * io_ctl->num_pages);
 446        } else {
 447                io_ctl->cur += sizeof(u64);
 448                io_ctl->size -= sizeof(u64) * 2;
 449        }
 450
 451        cache_gen = get_unaligned_le64(io_ctl->cur);
 452        if (cache_gen != generation) {
 453                btrfs_err_rl(io_ctl->fs_info,
 454                        "space cache generation (%llu) does not match inode (%llu)",
 455                                cache_gen, generation);
 456                io_ctl_unmap_page(io_ctl);
 457                return -EIO;
 458        }
 459        io_ctl->cur += sizeof(u64);
 460        return 0;
 461}
 462
 463static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
 464{
 465        u32 *tmp;
 466        u32 crc = ~(u32)0;
 467        unsigned offset = 0;
 468
 469        if (!io_ctl->check_crcs) {
 470                io_ctl_unmap_page(io_ctl);
 471                return;
 472        }
 473
 474        if (index == 0)
 475                offset = sizeof(u32) * io_ctl->num_pages;
 476
 477        crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
 478        btrfs_crc32c_final(crc, (u8 *)&crc);
 479        io_ctl_unmap_page(io_ctl);
 480        tmp = page_address(io_ctl->pages[0]);
 481        tmp += index;
 482        *tmp = crc;
 483}
 484
 485static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
 486{
 487        u32 *tmp, val;
 488        u32 crc = ~(u32)0;
 489        unsigned offset = 0;
 490
 491        if (!io_ctl->check_crcs) {
 492                io_ctl_map_page(io_ctl, 0);
 493                return 0;
 494        }
 495
 496        if (index == 0)
 497                offset = sizeof(u32) * io_ctl->num_pages;
 498
 499        tmp = page_address(io_ctl->pages[0]);
 500        tmp += index;
 501        val = *tmp;
 502
 503        io_ctl_map_page(io_ctl, 0);
 504        crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
 505        btrfs_crc32c_final(crc, (u8 *)&crc);
 506        if (val != crc) {
 507                btrfs_err_rl(io_ctl->fs_info,
 508                        "csum mismatch on free space cache");
 509                io_ctl_unmap_page(io_ctl);
 510                return -EIO;
 511        }
 512
 513        return 0;
 514}
 515
 516static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
 517                            void *bitmap)
 518{
 519        struct btrfs_free_space_entry *entry;
 520
 521        if (!io_ctl->cur)
 522                return -ENOSPC;
 523
 524        entry = io_ctl->cur;
 525        put_unaligned_le64(offset, &entry->offset);
 526        put_unaligned_le64(bytes, &entry->bytes);
 527        entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
 528                BTRFS_FREE_SPACE_EXTENT;
 529        io_ctl->cur += sizeof(struct btrfs_free_space_entry);
 530        io_ctl->size -= sizeof(struct btrfs_free_space_entry);
 531
 532        if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
 533                return 0;
 534
 535        io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 536
 537        /* No more pages to map */
 538        if (io_ctl->index >= io_ctl->num_pages)
 539                return 0;
 540
 541        /* map the next page */
 542        io_ctl_map_page(io_ctl, 1);
 543        return 0;
 544}
 545
 546static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
 547{
 548        if (!io_ctl->cur)
 549                return -ENOSPC;
 550
 551        /*
 552         * If we aren't at the start of the current page, unmap this one and
 553         * map the next one if there is any left.
 554         */
 555        if (io_ctl->cur != io_ctl->orig) {
 556                io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 557                if (io_ctl->index >= io_ctl->num_pages)
 558                        return -ENOSPC;
 559                io_ctl_map_page(io_ctl, 0);
 560        }
 561
 562        copy_page(io_ctl->cur, bitmap);
 563        io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 564        if (io_ctl->index < io_ctl->num_pages)
 565                io_ctl_map_page(io_ctl, 0);
 566        return 0;
 567}
 568
 569static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
 570{
 571        /*
 572         * If we're not on the boundary we know we've modified the page and we
 573         * need to crc the page.
 574         */
 575        if (io_ctl->cur != io_ctl->orig)
 576                io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 577        else
 578                io_ctl_unmap_page(io_ctl);
 579
 580        while (io_ctl->index < io_ctl->num_pages) {
 581                io_ctl_map_page(io_ctl, 1);
 582                io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 583        }
 584}
 585
 586static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
 587                            struct btrfs_free_space *entry, u8 *type)
 588{
 589        struct btrfs_free_space_entry *e;
 590        int ret;
 591
 592        if (!io_ctl->cur) {
 593                ret = io_ctl_check_crc(io_ctl, io_ctl->index);
 594                if (ret)
 595                        return ret;
 596        }
 597
 598        e = io_ctl->cur;
 599        entry->offset = get_unaligned_le64(&e->offset);
 600        entry->bytes = get_unaligned_le64(&e->bytes);
 601        *type = e->type;
 602        io_ctl->cur += sizeof(struct btrfs_free_space_entry);
 603        io_ctl->size -= sizeof(struct btrfs_free_space_entry);
 604
 605        if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
 606                return 0;
 607
 608        io_ctl_unmap_page(io_ctl);
 609
 610        return 0;
 611}
 612
 613static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
 614                              struct btrfs_free_space *entry)
 615{
 616        int ret;
 617
 618        ret = io_ctl_check_crc(io_ctl, io_ctl->index);
 619        if (ret)
 620                return ret;
 621
 622        copy_page(entry->bitmap, io_ctl->cur);
 623        io_ctl_unmap_page(io_ctl);
 624
 625        return 0;
 626}
 627
 628/*
 629 * Since we attach pinned extents after the fact we can have contiguous sections
 630 * of free space that are split up in entries.  This poses a problem with the
 631 * tree logging stuff since it could have allocated across what appears to be 2
 632 * entries since we would have merged the entries when adding the pinned extents
 633 * back to the free space cache.  So run through the space cache that we just
 634 * loaded and merge contiguous entries.  This will make the log replay stuff not
 635 * blow up and it will make for nicer allocator behavior.
 636 */
 637static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
 638{
 639        struct btrfs_free_space *e, *prev = NULL;
 640        struct rb_node *n;
 641
 642again:
 643        spin_lock(&ctl->tree_lock);
 644        for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
 645                e = rb_entry(n, struct btrfs_free_space, offset_index);
 646                if (!prev)
 647                        goto next;
 648                if (e->bitmap || prev->bitmap)
 649                        goto next;
 650                if (prev->offset + prev->bytes == e->offset) {
 651                        unlink_free_space(ctl, prev);
 652                        unlink_free_space(ctl, e);
 653                        prev->bytes += e->bytes;
 654                        kmem_cache_free(btrfs_free_space_cachep, e);
 655                        link_free_space(ctl, prev);
 656                        prev = NULL;
 657                        spin_unlock(&ctl->tree_lock);
 658                        goto again;
 659                }
 660next:
 661                prev = e;
 662        }
 663        spin_unlock(&ctl->tree_lock);
 664}
 665
 666static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 667                                   struct btrfs_free_space_ctl *ctl,
 668                                   struct btrfs_path *path, u64 offset)
 669{
 670        struct btrfs_fs_info *fs_info = root->fs_info;
 671        struct btrfs_free_space_header *header;
 672        struct extent_buffer *leaf;
 673        struct btrfs_io_ctl io_ctl;
 674        struct btrfs_key key;
 675        struct btrfs_free_space *e, *n;
 676        LIST_HEAD(bitmaps);
 677        u64 num_entries;
 678        u64 num_bitmaps;
 679        u64 generation;
 680        u8 type;
 681        int ret = 0;
 682
 683        /* Nothing in the space cache, goodbye */
 684        if (!i_size_read(inode))
 685                return 0;
 686
 687        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
 688        key.offset = offset;
 689        key.type = 0;
 690
 691        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 692        if (ret < 0)
 693                return 0;
 694        else if (ret > 0) {
 695                btrfs_release_path(path);
 696                return 0;
 697        }
 698
 699        ret = -1;
 700
 701        leaf = path->nodes[0];
 702        header = btrfs_item_ptr(leaf, path->slots[0],
 703                                struct btrfs_free_space_header);
 704        num_entries = btrfs_free_space_entries(leaf, header);
 705        num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
 706        generation = btrfs_free_space_generation(leaf, header);
 707        btrfs_release_path(path);
 708
 709        if (!BTRFS_I(inode)->generation) {
 710                btrfs_info(fs_info,
 711                           "the free space cache file (%llu) is invalid, skip it",
 712                           offset);
 713                return 0;
 714        }
 715
 716        if (BTRFS_I(inode)->generation != generation) {
 717                btrfs_err(fs_info,
 718                          "free space inode generation (%llu) did not match free space cache generation (%llu)",
 719                          BTRFS_I(inode)->generation, generation);
 720                return 0;
 721        }
 722
 723        if (!num_entries)
 724                return 0;
 725
 726        ret = io_ctl_init(&io_ctl, inode, 0);
 727        if (ret)
 728                return ret;
 729
 730        readahead_cache(inode);
 731
 732        ret = io_ctl_prepare_pages(&io_ctl, true);
 733        if (ret)
 734                goto out;
 735
 736        ret = io_ctl_check_crc(&io_ctl, 0);
 737        if (ret)
 738                goto free_cache;
 739
 740        ret = io_ctl_check_generation(&io_ctl, generation);
 741        if (ret)
 742                goto free_cache;
 743
 744        while (num_entries) {
 745                e = kmem_cache_zalloc(btrfs_free_space_cachep,
 746                                      GFP_NOFS);
 747                if (!e)
 748                        goto free_cache;
 749
 750                ret = io_ctl_read_entry(&io_ctl, e, &type);
 751                if (ret) {
 752                        kmem_cache_free(btrfs_free_space_cachep, e);
 753                        goto free_cache;
 754                }
 755
 756                /*
 757                 * Sync discard ensures that the free space cache is always
 758                 * trimmed.  So when reading this in, the state should reflect
 759                 * that.  We also do this for async as a stop gap for lack of
 760                 * persistence.
 761                 */
 762                if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
 763                    btrfs_test_opt(fs_info, DISCARD_ASYNC))
 764                        e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
 765
 766                if (!e->bytes) {
 767                        kmem_cache_free(btrfs_free_space_cachep, e);
 768                        goto free_cache;
 769                }
 770
 771                if (type == BTRFS_FREE_SPACE_EXTENT) {
 772                        spin_lock(&ctl->tree_lock);
 773                        ret = link_free_space(ctl, e);
 774                        spin_unlock(&ctl->tree_lock);
 775                        if (ret) {
 776                                btrfs_err(fs_info,
 777                                        "Duplicate entries in free space cache, dumping");
 778                                kmem_cache_free(btrfs_free_space_cachep, e);
 779                                goto free_cache;
 780                        }
 781                } else {
 782                        ASSERT(num_bitmaps);
 783                        num_bitmaps--;
 784                        e->bitmap = kmem_cache_zalloc(
 785                                        btrfs_free_space_bitmap_cachep, GFP_NOFS);
 786                        if (!e->bitmap) {
 787                                kmem_cache_free(
 788                                        btrfs_free_space_cachep, e);
 789                                goto free_cache;
 790                        }
 791                        spin_lock(&ctl->tree_lock);
 792                        ret = link_free_space(ctl, e);
 793                        ctl->total_bitmaps++;
 794                        ctl->op->recalc_thresholds(ctl);
 795                        spin_unlock(&ctl->tree_lock);
 796                        if (ret) {
 797                                btrfs_err(fs_info,
 798                                        "Duplicate entries in free space cache, dumping");
 799                                kmem_cache_free(btrfs_free_space_cachep, e);
 800                                goto free_cache;
 801                        }
 802                        list_add_tail(&e->list, &bitmaps);
 803                }
 804
 805                num_entries--;
 806        }
 807
 808        io_ctl_unmap_page(&io_ctl);
 809
 810        /*
 811         * We add the bitmaps at the end of the entries in order that
 812         * the bitmap entries are added to the cache.
 813         */
 814        list_for_each_entry_safe(e, n, &bitmaps, list) {
 815                list_del_init(&e->list);
 816                ret = io_ctl_read_bitmap(&io_ctl, e);
 817                if (ret)
 818                        goto free_cache;
 819                e->bitmap_extents = count_bitmap_extents(ctl, e);
 820                if (!btrfs_free_space_trimmed(e)) {
 821                        ctl->discardable_extents[BTRFS_STAT_CURR] +=
 822                                e->bitmap_extents;
 823                        ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
 824                }
 825        }
 826
 827        io_ctl_drop_pages(&io_ctl);
 828        merge_space_tree(ctl);
 829        ret = 1;
 830out:
 831        btrfs_discard_update_discardable(ctl->private, ctl);
 832        io_ctl_free(&io_ctl);
 833        return ret;
 834free_cache:
 835        io_ctl_drop_pages(&io_ctl);
 836        __btrfs_remove_free_space_cache(ctl);
 837        goto out;
 838}
 839
 840int load_free_space_cache(struct btrfs_block_group *block_group)
 841{
 842        struct btrfs_fs_info *fs_info = block_group->fs_info;
 843        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 844        struct inode *inode;
 845        struct btrfs_path *path;
 846        int ret = 0;
 847        bool matched;
 848        u64 used = block_group->used;
 849
 850        /*
 851         * If this block group has been marked to be cleared for one reason or
 852         * another then we can't trust the on disk cache, so just return.
 853         */
 854        spin_lock(&block_group->lock);
 855        if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
 856                spin_unlock(&block_group->lock);
 857                return 0;
 858        }
 859        spin_unlock(&block_group->lock);
 860
 861        path = btrfs_alloc_path();
 862        if (!path)
 863                return 0;
 864        path->search_commit_root = 1;
 865        path->skip_locking = 1;
 866
 867        /*
 868         * We must pass a path with search_commit_root set to btrfs_iget in
 869         * order to avoid a deadlock when allocating extents for the tree root.
 870         *
 871         * When we are COWing an extent buffer from the tree root, when looking
 872         * for a free extent, at extent-tree.c:find_free_extent(), we can find
 873         * block group without its free space cache loaded. When we find one
 874         * we must load its space cache which requires reading its free space
 875         * cache's inode item from the root tree. If this inode item is located
 876         * in the same leaf that we started COWing before, then we end up in
 877         * deadlock on the extent buffer (trying to read lock it when we
 878         * previously write locked it).
 879         *
 880         * It's safe to read the inode item using the commit root because
 881         * block groups, once loaded, stay in memory forever (until they are
 882         * removed) as well as their space caches once loaded. New block groups
 883         * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
 884         * we will never try to read their inode item while the fs is mounted.
 885         */
 886        inode = lookup_free_space_inode(block_group, path);
 887        if (IS_ERR(inode)) {
 888                btrfs_free_path(path);
 889                return 0;
 890        }
 891
 892        /* We may have converted the inode and made the cache invalid. */
 893        spin_lock(&block_group->lock);
 894        if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
 895                spin_unlock(&block_group->lock);
 896                btrfs_free_path(path);
 897                goto out;
 898        }
 899        spin_unlock(&block_group->lock);
 900
 901        ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
 902                                      path, block_group->start);
 903        btrfs_free_path(path);
 904        if (ret <= 0)
 905                goto out;
 906
 907        spin_lock(&ctl->tree_lock);
 908        matched = (ctl->free_space == (block_group->length - used -
 909                                       block_group->bytes_super));
 910        spin_unlock(&ctl->tree_lock);
 911
 912        if (!matched) {
 913                __btrfs_remove_free_space_cache(ctl);
 914                btrfs_warn(fs_info,
 915                           "block group %llu has wrong amount of free space",
 916                           block_group->start);
 917                ret = -1;
 918        }
 919out:
 920        if (ret < 0) {
 921                /* This cache is bogus, make sure it gets cleared */
 922                spin_lock(&block_group->lock);
 923                block_group->disk_cache_state = BTRFS_DC_CLEAR;
 924                spin_unlock(&block_group->lock);
 925                ret = 0;
 926
 927                btrfs_warn(fs_info,
 928                           "failed to load free space cache for block group %llu, rebuilding it now",
 929                           block_group->start);
 930        }
 931
 932        iput(inode);
 933        return ret;
 934}
 935
 936static noinline_for_stack
 937int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
 938                              struct btrfs_free_space_ctl *ctl,
 939                              struct btrfs_block_group *block_group,
 940                              int *entries, int *bitmaps,
 941                              struct list_head *bitmap_list)
 942{
 943        int ret;
 944        struct btrfs_free_cluster *cluster = NULL;
 945        struct btrfs_free_cluster *cluster_locked = NULL;
 946        struct rb_node *node = rb_first(&ctl->free_space_offset);
 947        struct btrfs_trim_range *trim_entry;
 948
 949        /* Get the cluster for this block_group if it exists */
 950        if (block_group && !list_empty(&block_group->cluster_list)) {
 951                cluster = list_entry(block_group->cluster_list.next,
 952                                     struct btrfs_free_cluster,
 953                                     block_group_list);
 954        }
 955
 956        if (!node && cluster) {
 957                cluster_locked = cluster;
 958                spin_lock(&cluster_locked->lock);
 959                node = rb_first(&cluster->root);
 960                cluster = NULL;
 961        }
 962
 963        /* Write out the extent entries */
 964        while (node) {
 965                struct btrfs_free_space *e;
 966
 967                e = rb_entry(node, struct btrfs_free_space, offset_index);
 968                *entries += 1;
 969
 970                ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
 971                                       e->bitmap);
 972                if (ret)
 973                        goto fail;
 974
 975                if (e->bitmap) {
 976                        list_add_tail(&e->list, bitmap_list);
 977                        *bitmaps += 1;
 978                }
 979                node = rb_next(node);
 980                if (!node && cluster) {
 981                        node = rb_first(&cluster->root);
 982                        cluster_locked = cluster;
 983                        spin_lock(&cluster_locked->lock);
 984                        cluster = NULL;
 985                }
 986        }
 987        if (cluster_locked) {
 988                spin_unlock(&cluster_locked->lock);
 989                cluster_locked = NULL;
 990        }
 991
 992        /*
 993         * Make sure we don't miss any range that was removed from our rbtree
 994         * because trimming is running. Otherwise after a umount+mount (or crash
 995         * after committing the transaction) we would leak free space and get
 996         * an inconsistent free space cache report from fsck.
 997         */
 998        list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
 999                ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1000                                       trim_entry->bytes, NULL);
1001                if (ret)
1002                        goto fail;
1003                *entries += 1;
1004        }
1005
1006        return 0;
1007fail:
1008        if (cluster_locked)
1009                spin_unlock(&cluster_locked->lock);
1010        return -ENOSPC;
1011}
1012
1013static noinline_for_stack int
1014update_cache_item(struct btrfs_trans_handle *trans,
1015                  struct btrfs_root *root,
1016                  struct inode *inode,
1017                  struct btrfs_path *path, u64 offset,
1018                  int entries, int bitmaps)
1019{
1020        struct btrfs_key key;
1021        struct btrfs_free_space_header *header;
1022        struct extent_buffer *leaf;
1023        int ret;
1024
1025        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1026        key.offset = offset;
1027        key.type = 0;
1028
1029        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1030        if (ret < 0) {
1031                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1032                                 EXTENT_DELALLOC, 0, 0, NULL);
1033                goto fail;
1034        }
1035        leaf = path->nodes[0];
1036        if (ret > 0) {
1037                struct btrfs_key found_key;
1038                ASSERT(path->slots[0]);
1039                path->slots[0]--;
1040                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1041                if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1042                    found_key.offset != offset) {
1043                        clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1044                                         inode->i_size - 1, EXTENT_DELALLOC, 0,
1045                                         0, NULL);
1046                        btrfs_release_path(path);
1047                        goto fail;
1048                }
1049        }
1050
1051        BTRFS_I(inode)->generation = trans->transid;
1052        header = btrfs_item_ptr(leaf, path->slots[0],
1053                                struct btrfs_free_space_header);
1054        btrfs_set_free_space_entries(leaf, header, entries);
1055        btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1056        btrfs_set_free_space_generation(leaf, header, trans->transid);
1057        btrfs_mark_buffer_dirty(leaf);
1058        btrfs_release_path(path);
1059
1060        return 0;
1061
1062fail:
1063        return -1;
1064}
1065
1066static noinline_for_stack int write_pinned_extent_entries(
1067                            struct btrfs_trans_handle *trans,
1068                            struct btrfs_block_group *block_group,
1069                            struct btrfs_io_ctl *io_ctl,
1070                            int *entries)
1071{
1072        u64 start, extent_start, extent_end, len;
1073        struct extent_io_tree *unpin = NULL;
1074        int ret;
1075
1076        if (!block_group)
1077                return 0;
1078
1079        /*
1080         * We want to add any pinned extents to our free space cache
1081         * so we don't leak the space
1082         *
1083         * We shouldn't have switched the pinned extents yet so this is the
1084         * right one
1085         */
1086        unpin = &trans->transaction->pinned_extents;
1087
1088        start = block_group->start;
1089
1090        while (start < block_group->start + block_group->length) {
1091                ret = find_first_extent_bit(unpin, start,
1092                                            &extent_start, &extent_end,
1093                                            EXTENT_DIRTY, NULL);
1094                if (ret)
1095                        return 0;
1096
1097                /* This pinned extent is out of our range */
1098                if (extent_start >= block_group->start + block_group->length)
1099                        return 0;
1100
1101                extent_start = max(extent_start, start);
1102                extent_end = min(block_group->start + block_group->length,
1103                                 extent_end + 1);
1104                len = extent_end - extent_start;
1105
1106                *entries += 1;
1107                ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1108                if (ret)
1109                        return -ENOSPC;
1110
1111                start = extent_end;
1112        }
1113
1114        return 0;
1115}
1116
1117static noinline_for_stack int
1118write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1119{
1120        struct btrfs_free_space *entry, *next;
1121        int ret;
1122
1123        /* Write out the bitmaps */
1124        list_for_each_entry_safe(entry, next, bitmap_list, list) {
1125                ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1126                if (ret)
1127                        return -ENOSPC;
1128                list_del_init(&entry->list);
1129        }
1130
1131        return 0;
1132}
1133
1134static int flush_dirty_cache(struct inode *inode)
1135{
1136        int ret;
1137
1138        ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1139        if (ret)
1140                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1141                                 EXTENT_DELALLOC, 0, 0, NULL);
1142
1143        return ret;
1144}
1145
1146static void noinline_for_stack
1147cleanup_bitmap_list(struct list_head *bitmap_list)
1148{
1149        struct btrfs_free_space *entry, *next;
1150
1151        list_for_each_entry_safe(entry, next, bitmap_list, list)
1152                list_del_init(&entry->list);
1153}
1154
1155static void noinline_for_stack
1156cleanup_write_cache_enospc(struct inode *inode,
1157                           struct btrfs_io_ctl *io_ctl,
1158                           struct extent_state **cached_state)
1159{
1160        io_ctl_drop_pages(io_ctl);
1161        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1162                             i_size_read(inode) - 1, cached_state);
1163}
1164
1165static int __btrfs_wait_cache_io(struct btrfs_root *root,
1166                                 struct btrfs_trans_handle *trans,
1167                                 struct btrfs_block_group *block_group,
1168                                 struct btrfs_io_ctl *io_ctl,
1169                                 struct btrfs_path *path, u64 offset)
1170{
1171        int ret;
1172        struct inode *inode = io_ctl->inode;
1173
1174        if (!inode)
1175                return 0;
1176
1177        /* Flush the dirty pages in the cache file. */
1178        ret = flush_dirty_cache(inode);
1179        if (ret)
1180                goto out;
1181
1182        /* Update the cache item to tell everyone this cache file is valid. */
1183        ret = update_cache_item(trans, root, inode, path, offset,
1184                                io_ctl->entries, io_ctl->bitmaps);
1185out:
1186        if (ret) {
1187                invalidate_inode_pages2(inode->i_mapping);
1188                BTRFS_I(inode)->generation = 0;
1189                if (block_group)
1190                        btrfs_debug(root->fs_info,
1191          "failed to write free space cache for block group %llu error %d",
1192                                  block_group->start, ret);
1193        }
1194        btrfs_update_inode(trans, root, inode);
1195
1196        if (block_group) {
1197                /* the dirty list is protected by the dirty_bgs_lock */
1198                spin_lock(&trans->transaction->dirty_bgs_lock);
1199
1200                /* the disk_cache_state is protected by the block group lock */
1201                spin_lock(&block_group->lock);
1202
1203                /*
1204                 * only mark this as written if we didn't get put back on
1205                 * the dirty list while waiting for IO.   Otherwise our
1206                 * cache state won't be right, and we won't get written again
1207                 */
1208                if (!ret && list_empty(&block_group->dirty_list))
1209                        block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1210                else if (ret)
1211                        block_group->disk_cache_state = BTRFS_DC_ERROR;
1212
1213                spin_unlock(&block_group->lock);
1214                spin_unlock(&trans->transaction->dirty_bgs_lock);
1215                io_ctl->inode = NULL;
1216                iput(inode);
1217        }
1218
1219        return ret;
1220
1221}
1222
1223static int btrfs_wait_cache_io_root(struct btrfs_root *root,
1224                                    struct btrfs_trans_handle *trans,
1225                                    struct btrfs_io_ctl *io_ctl,
1226                                    struct btrfs_path *path)
1227{
1228        return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0);
1229}
1230
1231int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1232                        struct btrfs_block_group *block_group,
1233                        struct btrfs_path *path)
1234{
1235        return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1236                                     block_group, &block_group->io_ctl,
1237                                     path, block_group->start);
1238}
1239
1240/**
1241 * __btrfs_write_out_cache - write out cached info to an inode
1242 * @root - the root the inode belongs to
1243 * @ctl - the free space cache we are going to write out
1244 * @block_group - the block_group for this cache if it belongs to a block_group
1245 * @trans - the trans handle
1246 *
1247 * This function writes out a free space cache struct to disk for quick recovery
1248 * on mount.  This will return 0 if it was successful in writing the cache out,
1249 * or an errno if it was not.
1250 */
1251static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1252                                   struct btrfs_free_space_ctl *ctl,
1253                                   struct btrfs_block_group *block_group,
1254                                   struct btrfs_io_ctl *io_ctl,
1255                                   struct btrfs_trans_handle *trans)
1256{
1257        struct extent_state *cached_state = NULL;
1258        LIST_HEAD(bitmap_list);
1259        int entries = 0;
1260        int bitmaps = 0;
1261        int ret;
1262        int must_iput = 0;
1263
1264        if (!i_size_read(inode))
1265                return -EIO;
1266
1267        WARN_ON(io_ctl->pages);
1268        ret = io_ctl_init(io_ctl, inode, 1);
1269        if (ret)
1270                return ret;
1271
1272        if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1273                down_write(&block_group->data_rwsem);
1274                spin_lock(&block_group->lock);
1275                if (block_group->delalloc_bytes) {
1276                        block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1277                        spin_unlock(&block_group->lock);
1278                        up_write(&block_group->data_rwsem);
1279                        BTRFS_I(inode)->generation = 0;
1280                        ret = 0;
1281                        must_iput = 1;
1282                        goto out;
1283                }
1284                spin_unlock(&block_group->lock);
1285        }
1286
1287        /* Lock all pages first so we can lock the extent safely. */
1288        ret = io_ctl_prepare_pages(io_ctl, false);
1289        if (ret)
1290                goto out_unlock;
1291
1292        lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1293                         &cached_state);
1294
1295        io_ctl_set_generation(io_ctl, trans->transid);
1296
1297        mutex_lock(&ctl->cache_writeout_mutex);
1298        /* Write out the extent entries in the free space cache */
1299        spin_lock(&ctl->tree_lock);
1300        ret = write_cache_extent_entries(io_ctl, ctl,
1301                                         block_group, &entries, &bitmaps,
1302                                         &bitmap_list);
1303        if (ret)
1304                goto out_nospc_locked;
1305
1306        /*
1307         * Some spaces that are freed in the current transaction are pinned,
1308         * they will be added into free space cache after the transaction is
1309         * committed, we shouldn't lose them.
1310         *
1311         * If this changes while we are working we'll get added back to
1312         * the dirty list and redo it.  No locking needed
1313         */
1314        ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
1315        if (ret)
1316                goto out_nospc_locked;
1317
1318        /*
1319         * At last, we write out all the bitmaps and keep cache_writeout_mutex
1320         * locked while doing it because a concurrent trim can be manipulating
1321         * or freeing the bitmap.
1322         */
1323        ret = write_bitmap_entries(io_ctl, &bitmap_list);
1324        spin_unlock(&ctl->tree_lock);
1325        mutex_unlock(&ctl->cache_writeout_mutex);
1326        if (ret)
1327                goto out_nospc;
1328
1329        /* Zero out the rest of the pages just to make sure */
1330        io_ctl_zero_remaining_pages(io_ctl);
1331
1332        /* Everything is written out, now we dirty the pages in the file. */
1333        ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1334                                io_ctl->num_pages, 0, i_size_read(inode),
1335                                &cached_state);
1336        if (ret)
1337                goto out_nospc;
1338
1339        if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1340                up_write(&block_group->data_rwsem);
1341        /*
1342         * Release the pages and unlock the extent, we will flush
1343         * them out later
1344         */
1345        io_ctl_drop_pages(io_ctl);
1346        io_ctl_free(io_ctl);
1347
1348        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1349                             i_size_read(inode) - 1, &cached_state);
1350
1351        /*
1352         * at this point the pages are under IO and we're happy,
1353         * The caller is responsible for waiting on them and updating
1354         * the cache and the inode
1355         */
1356        io_ctl->entries = entries;
1357        io_ctl->bitmaps = bitmaps;
1358
1359        ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1360        if (ret)
1361                goto out;
1362
1363        return 0;
1364
1365out_nospc_locked:
1366        cleanup_bitmap_list(&bitmap_list);
1367        spin_unlock(&ctl->tree_lock);
1368        mutex_unlock(&ctl->cache_writeout_mutex);
1369
1370out_nospc:
1371        cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
1372
1373out_unlock:
1374        if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1375                up_write(&block_group->data_rwsem);
1376
1377out:
1378        io_ctl->inode = NULL;
1379        io_ctl_free(io_ctl);
1380        if (ret) {
1381                invalidate_inode_pages2(inode->i_mapping);
1382                BTRFS_I(inode)->generation = 0;
1383        }
1384        btrfs_update_inode(trans, root, inode);
1385        if (must_iput)
1386                iput(inode);
1387        return ret;
1388}
1389
1390int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1391                          struct btrfs_block_group *block_group,
1392                          struct btrfs_path *path)
1393{
1394        struct btrfs_fs_info *fs_info = trans->fs_info;
1395        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1396        struct inode *inode;
1397        int ret = 0;
1398
1399        spin_lock(&block_group->lock);
1400        if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1401                spin_unlock(&block_group->lock);
1402                return 0;
1403        }
1404        spin_unlock(&block_group->lock);
1405
1406        inode = lookup_free_space_inode(block_group, path);
1407        if (IS_ERR(inode))
1408                return 0;
1409
1410        ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1411                                block_group, &block_group->io_ctl, trans);
1412        if (ret) {
1413                btrfs_debug(fs_info,
1414          "failed to write free space cache for block group %llu error %d",
1415                          block_group->start, ret);
1416                spin_lock(&block_group->lock);
1417                block_group->disk_cache_state = BTRFS_DC_ERROR;
1418                spin_unlock(&block_group->lock);
1419
1420                block_group->io_ctl.inode = NULL;
1421                iput(inode);
1422        }
1423
1424        /*
1425         * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1426         * to wait for IO and put the inode
1427         */
1428
1429        return ret;
1430}
1431
1432static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1433                                          u64 offset)
1434{
1435        ASSERT(offset >= bitmap_start);
1436        offset -= bitmap_start;
1437        return (unsigned long)(div_u64(offset, unit));
1438}
1439
1440static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1441{
1442        return (unsigned long)(div_u64(bytes, unit));
1443}
1444
1445static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1446                                   u64 offset)
1447{
1448        u64 bitmap_start;
1449        u64 bytes_per_bitmap;
1450
1451        bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1452        bitmap_start = offset - ctl->start;
1453        bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1454        bitmap_start *= bytes_per_bitmap;
1455        bitmap_start += ctl->start;
1456
1457        return bitmap_start;
1458}
1459
1460static int tree_insert_offset(struct rb_root *root, u64 offset,
1461                              struct rb_node *node, int bitmap)
1462{
1463        struct rb_node **p = &root->rb_node;
1464        struct rb_node *parent = NULL;
1465        struct btrfs_free_space *info;
1466
1467        while (*p) {
1468                parent = *p;
1469                info = rb_entry(parent, struct btrfs_free_space, offset_index);
1470
1471                if (offset < info->offset) {
1472                        p = &(*p)->rb_left;
1473                } else if (offset > info->offset) {
1474                        p = &(*p)->rb_right;
1475                } else {
1476                        /*
1477                         * we could have a bitmap entry and an extent entry
1478                         * share the same offset.  If this is the case, we want
1479                         * the extent entry to always be found first if we do a
1480                         * linear search through the tree, since we want to have
1481                         * the quickest allocation time, and allocating from an
1482                         * extent is faster than allocating from a bitmap.  So
1483                         * if we're inserting a bitmap and we find an entry at
1484                         * this offset, we want to go right, or after this entry
1485                         * logically.  If we are inserting an extent and we've
1486                         * found a bitmap, we want to go left, or before
1487                         * logically.
1488                         */
1489                        if (bitmap) {
1490                                if (info->bitmap) {
1491                                        WARN_ON_ONCE(1);
1492                                        return -EEXIST;
1493                                }
1494                                p = &(*p)->rb_right;
1495                        } else {
1496                                if (!info->bitmap) {
1497                                        WARN_ON_ONCE(1);
1498                                        return -EEXIST;
1499                                }
1500                                p = &(*p)->rb_left;
1501                        }
1502                }
1503        }
1504
1505        rb_link_node(node, parent, p);
1506        rb_insert_color(node, root);
1507
1508        return 0;
1509}
1510
1511/*
1512 * searches the tree for the given offset.
1513 *
1514 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1515 * want a section that has at least bytes size and comes at or after the given
1516 * offset.
1517 */
1518static struct btrfs_free_space *
1519tree_search_offset(struct btrfs_free_space_ctl *ctl,
1520                   u64 offset, int bitmap_only, int fuzzy)
1521{
1522        struct rb_node *n = ctl->free_space_offset.rb_node;
1523        struct btrfs_free_space *entry, *prev = NULL;
1524
1525        /* find entry that is closest to the 'offset' */
1526        while (1) {
1527                if (!n) {
1528                        entry = NULL;
1529                        break;
1530                }
1531
1532                entry = rb_entry(n, struct btrfs_free_space, offset_index);
1533                prev = entry;
1534
1535                if (offset < entry->offset)
1536                        n = n->rb_left;
1537                else if (offset > entry->offset)
1538                        n = n->rb_right;
1539                else
1540                        break;
1541        }
1542
1543        if (bitmap_only) {
1544                if (!entry)
1545                        return NULL;
1546                if (entry->bitmap)
1547                        return entry;
1548
1549                /*
1550                 * bitmap entry and extent entry may share same offset,
1551                 * in that case, bitmap entry comes after extent entry.
1552                 */
1553                n = rb_next(n);
1554                if (!n)
1555                        return NULL;
1556                entry = rb_entry(n, struct btrfs_free_space, offset_index);
1557                if (entry->offset != offset)
1558                        return NULL;
1559
1560                WARN_ON(!entry->bitmap);
1561                return entry;
1562        } else if (entry) {
1563                if (entry->bitmap) {
1564                        /*
1565                         * if previous extent entry covers the offset,
1566                         * we should return it instead of the bitmap entry
1567                         */
1568                        n = rb_prev(&entry->offset_index);
1569                        if (n) {
1570                                prev = rb_entry(n, struct btrfs_free_space,
1571                                                offset_index);
1572                                if (!prev->bitmap &&
1573                                    prev->offset + prev->bytes > offset)
1574                                        entry = prev;
1575                        }
1576                }
1577                return entry;
1578        }
1579
1580        if (!prev)
1581                return NULL;
1582
1583        /* find last entry before the 'offset' */
1584        entry = prev;
1585        if (entry->offset > offset) {
1586                n = rb_prev(&entry->offset_index);
1587                if (n) {
1588                        entry = rb_entry(n, struct btrfs_free_space,
1589                                        offset_index);
1590                        ASSERT(entry->offset <= offset);
1591                } else {
1592                        if (fuzzy)
1593                                return entry;
1594                        else
1595                                return NULL;
1596                }
1597        }
1598
1599        if (entry->bitmap) {
1600                n = rb_prev(&entry->offset_index);
1601                if (n) {
1602                        prev = rb_entry(n, struct btrfs_free_space,
1603                                        offset_index);
1604                        if (!prev->bitmap &&
1605                            prev->offset + prev->bytes > offset)
1606                                return prev;
1607                }
1608                if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1609                        return entry;
1610        } else if (entry->offset + entry->bytes > offset)
1611                return entry;
1612
1613        if (!fuzzy)
1614                return NULL;
1615
1616        while (1) {
1617                if (entry->bitmap) {
1618                        if (entry->offset + BITS_PER_BITMAP *
1619                            ctl->unit > offset)
1620                                break;
1621                } else {
1622                        if (entry->offset + entry->bytes > offset)
1623                                break;
1624                }
1625
1626                n = rb_next(&entry->offset_index);
1627                if (!n)
1628                        return NULL;
1629                entry = rb_entry(n, struct btrfs_free_space, offset_index);
1630        }
1631        return entry;
1632}
1633
1634static inline void
1635__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1636                    struct btrfs_free_space *info)
1637{
1638        rb_erase(&info->offset_index, &ctl->free_space_offset);
1639        ctl->free_extents--;
1640
1641        if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1642                ctl->discardable_extents[BTRFS_STAT_CURR]--;
1643                ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1644        }
1645}
1646
1647static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1648                              struct btrfs_free_space *info)
1649{
1650        __unlink_free_space(ctl, info);
1651        ctl->free_space -= info->bytes;
1652}
1653
1654static int link_free_space(struct btrfs_free_space_ctl *ctl,
1655                           struct btrfs_free_space *info)
1656{
1657        int ret = 0;
1658
1659        ASSERT(info->bytes || info->bitmap);
1660        ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1661                                 &info->offset_index, (info->bitmap != NULL));
1662        if (ret)
1663                return ret;
1664
1665        if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1666                ctl->discardable_extents[BTRFS_STAT_CURR]++;
1667                ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1668        }
1669
1670        ctl->free_space += info->bytes;
1671        ctl->free_extents++;
1672        return ret;
1673}
1674
1675static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1676{
1677        struct btrfs_block_group *block_group = ctl->private;
1678        u64 max_bytes;
1679        u64 bitmap_bytes;
1680        u64 extent_bytes;
1681        u64 size = block_group->length;
1682        u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1683        u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1684
1685        max_bitmaps = max_t(u64, max_bitmaps, 1);
1686
1687        ASSERT(ctl->total_bitmaps <= max_bitmaps);
1688
1689        /*
1690         * We are trying to keep the total amount of memory used per 1GiB of
1691         * space to be MAX_CACHE_BYTES_PER_GIG.  However, with a reclamation
1692         * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1693         * bitmaps, we may end up using more memory than this.
1694         */
1695        if (size < SZ_1G)
1696                max_bytes = MAX_CACHE_BYTES_PER_GIG;
1697        else
1698                max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
1699
1700        bitmap_bytes = ctl->total_bitmaps * ctl->unit;
1701
1702        /*
1703         * we want the extent entry threshold to always be at most 1/2 the max
1704         * bytes we can have, or whatever is less than that.
1705         */
1706        extent_bytes = max_bytes - bitmap_bytes;
1707        extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
1708
1709        ctl->extents_thresh =
1710                div_u64(extent_bytes, sizeof(struct btrfs_free_space));
1711}
1712
1713static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1714                                       struct btrfs_free_space *info,
1715                                       u64 offset, u64 bytes)
1716{
1717        unsigned long start, count, end;
1718        int extent_delta = -1;
1719
1720        start = offset_to_bit(info->offset, ctl->unit, offset);
1721        count = bytes_to_bits(bytes, ctl->unit);
1722        end = start + count;
1723        ASSERT(end <= BITS_PER_BITMAP);
1724
1725        bitmap_clear(info->bitmap, start, count);
1726
1727        info->bytes -= bytes;
1728        if (info->max_extent_size > ctl->unit)
1729                info->max_extent_size = 0;
1730
1731        if (start && test_bit(start - 1, info->bitmap))
1732                extent_delta++;
1733
1734        if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1735                extent_delta++;
1736
1737        info->bitmap_extents += extent_delta;
1738        if (!btrfs_free_space_trimmed(info)) {
1739                ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1740                ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1741        }
1742}
1743
1744static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1745                              struct btrfs_free_space *info, u64 offset,
1746                              u64 bytes)
1747{
1748        __bitmap_clear_bits(ctl, info, offset, bytes);
1749        ctl->free_space -= bytes;
1750}
1751
1752static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1753                            struct btrfs_free_space *info, u64 offset,
1754                            u64 bytes)
1755{
1756        unsigned long start, count, end;
1757        int extent_delta = 1;
1758
1759        start = offset_to_bit(info->offset, ctl->unit, offset);
1760        count = bytes_to_bits(bytes, ctl->unit);
1761        end = start + count;
1762        ASSERT(end <= BITS_PER_BITMAP);
1763
1764        bitmap_set(info->bitmap, start, count);
1765
1766        info->bytes += bytes;
1767        ctl->free_space += bytes;
1768
1769        if (start && test_bit(start - 1, info->bitmap))
1770                extent_delta--;
1771
1772        if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1773                extent_delta--;
1774
1775        info->bitmap_extents += extent_delta;
1776        if (!btrfs_free_space_trimmed(info)) {
1777                ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1778                ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1779        }
1780}
1781
1782/*
1783 * If we can not find suitable extent, we will use bytes to record
1784 * the size of the max extent.
1785 */
1786static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1787                         struct btrfs_free_space *bitmap_info, u64 *offset,
1788                         u64 *bytes, bool for_alloc)
1789{
1790        unsigned long found_bits = 0;
1791        unsigned long max_bits = 0;
1792        unsigned long bits, i;
1793        unsigned long next_zero;
1794        unsigned long extent_bits;
1795
1796        /*
1797         * Skip searching the bitmap if we don't have a contiguous section that
1798         * is large enough for this allocation.
1799         */
1800        if (for_alloc &&
1801            bitmap_info->max_extent_size &&
1802            bitmap_info->max_extent_size < *bytes) {
1803                *bytes = bitmap_info->max_extent_size;
1804                return -1;
1805        }
1806
1807        i = offset_to_bit(bitmap_info->offset, ctl->unit,
1808                          max_t(u64, *offset, bitmap_info->offset));
1809        bits = bytes_to_bits(*bytes, ctl->unit);
1810
1811        for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1812                if (for_alloc && bits == 1) {
1813                        found_bits = 1;
1814                        break;
1815                }
1816                next_zero = find_next_zero_bit(bitmap_info->bitmap,
1817                                               BITS_PER_BITMAP, i);
1818                extent_bits = next_zero - i;
1819                if (extent_bits >= bits) {
1820                        found_bits = extent_bits;
1821                        break;
1822                } else if (extent_bits > max_bits) {
1823                        max_bits = extent_bits;
1824                }
1825                i = next_zero;
1826        }
1827
1828        if (found_bits) {
1829                *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1830                *bytes = (u64)(found_bits) * ctl->unit;
1831                return 0;
1832        }
1833
1834        *bytes = (u64)(max_bits) * ctl->unit;
1835        bitmap_info->max_extent_size = *bytes;
1836        return -1;
1837}
1838
1839static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1840{
1841        if (entry->bitmap)
1842                return entry->max_extent_size;
1843        return entry->bytes;
1844}
1845
1846/* Cache the size of the max extent in bytes */
1847static struct btrfs_free_space *
1848find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1849                unsigned long align, u64 *max_extent_size)
1850{
1851        struct btrfs_free_space *entry;
1852        struct rb_node *node;
1853        u64 tmp;
1854        u64 align_off;
1855        int ret;
1856
1857        if (!ctl->free_space_offset.rb_node)
1858                goto out;
1859
1860        entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1861        if (!entry)
1862                goto out;
1863
1864        for (node = &entry->offset_index; node; node = rb_next(node)) {
1865                entry = rb_entry(node, struct btrfs_free_space, offset_index);
1866                if (entry->bytes < *bytes) {
1867                        *max_extent_size = max(get_max_extent_size(entry),
1868                                               *max_extent_size);
1869                        continue;
1870                }
1871
1872                /* make sure the space returned is big enough
1873                 * to match our requested alignment
1874                 */
1875                if (*bytes >= align) {
1876                        tmp = entry->offset - ctl->start + align - 1;
1877                        tmp = div64_u64(tmp, align);
1878                        tmp = tmp * align + ctl->start;
1879                        align_off = tmp - entry->offset;
1880                } else {
1881                        align_off = 0;
1882                        tmp = entry->offset;
1883                }
1884
1885                if (entry->bytes < *bytes + align_off) {
1886                        *max_extent_size = max(get_max_extent_size(entry),
1887                                               *max_extent_size);
1888                        continue;
1889                }
1890
1891                if (entry->bitmap) {
1892                        u64 size = *bytes;
1893
1894                        ret = search_bitmap(ctl, entry, &tmp, &size, true);
1895                        if (!ret) {
1896                                *offset = tmp;
1897                                *bytes = size;
1898                                return entry;
1899                        } else {
1900                                *max_extent_size =
1901                                        max(get_max_extent_size(entry),
1902                                            *max_extent_size);
1903                        }
1904                        continue;
1905                }
1906
1907                *offset = tmp;
1908                *bytes = entry->bytes - align_off;
1909                return entry;
1910        }
1911out:
1912        return NULL;
1913}
1914
1915static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
1916                                struct btrfs_free_space *bitmap_info)
1917{
1918        struct btrfs_block_group *block_group = ctl->private;
1919        u64 bytes = bitmap_info->bytes;
1920        unsigned int rs, re;
1921        int count = 0;
1922
1923        if (!block_group || !bytes)
1924                return count;
1925
1926        bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
1927                                   BITS_PER_BITMAP) {
1928                bytes -= (rs - re) * ctl->unit;
1929                count++;
1930
1931                if (!bytes)
1932                        break;
1933        }
1934
1935        return count;
1936}
1937
1938static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1939                           struct btrfs_free_space *info, u64 offset)
1940{
1941        info->offset = offset_to_bitmap(ctl, offset);
1942        info->bytes = 0;
1943        info->bitmap_extents = 0;
1944        INIT_LIST_HEAD(&info->list);
1945        link_free_space(ctl, info);
1946        ctl->total_bitmaps++;
1947
1948        ctl->op->recalc_thresholds(ctl);
1949}
1950
1951static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1952                        struct btrfs_free_space *bitmap_info)
1953{
1954        /*
1955         * Normally when this is called, the bitmap is completely empty. However,
1956         * if we are blowing up the free space cache for one reason or another
1957         * via __btrfs_remove_free_space_cache(), then it may not be freed and
1958         * we may leave stats on the table.
1959         */
1960        if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1961                ctl->discardable_extents[BTRFS_STAT_CURR] -=
1962                        bitmap_info->bitmap_extents;
1963                ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1964
1965        }
1966        unlink_free_space(ctl, bitmap_info);
1967        kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
1968        kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1969        ctl->total_bitmaps--;
1970        ctl->op->recalc_thresholds(ctl);
1971}
1972
1973static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1974                              struct btrfs_free_space *bitmap_info,
1975                              u64 *offset, u64 *bytes)
1976{
1977        u64 end;
1978        u64 search_start, search_bytes;
1979        int ret;
1980
1981again:
1982        end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1983
1984        /*
1985         * We need to search for bits in this bitmap.  We could only cover some
1986         * of the extent in this bitmap thanks to how we add space, so we need
1987         * to search for as much as it as we can and clear that amount, and then
1988         * go searching for the next bit.
1989         */
1990        search_start = *offset;
1991        search_bytes = ctl->unit;
1992        search_bytes = min(search_bytes, end - search_start + 1);
1993        ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
1994                            false);
1995        if (ret < 0 || search_start != *offset)
1996                return -EINVAL;
1997
1998        /* We may have found more bits than what we need */
1999        search_bytes = min(search_bytes, *bytes);
2000
2001        /* Cannot clear past the end of the bitmap */
2002        search_bytes = min(search_bytes, end - search_start + 1);
2003
2004        bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2005        *offset += search_bytes;
2006        *bytes -= search_bytes;
2007
2008        if (*bytes) {
2009                struct rb_node *next = rb_next(&bitmap_info->offset_index);
2010                if (!bitmap_info->bytes)
2011                        free_bitmap(ctl, bitmap_info);
2012
2013                /*
2014                 * no entry after this bitmap, but we still have bytes to
2015                 * remove, so something has gone wrong.
2016                 */
2017                if (!next)
2018                        return -EINVAL;
2019
2020                bitmap_info = rb_entry(next, struct btrfs_free_space,
2021                                       offset_index);
2022
2023                /*
2024                 * if the next entry isn't a bitmap we need to return to let the
2025                 * extent stuff do its work.
2026                 */
2027                if (!bitmap_info->bitmap)
2028                        return -EAGAIN;
2029
2030                /*
2031                 * Ok the next item is a bitmap, but it may not actually hold
2032                 * the information for the rest of this free space stuff, so
2033                 * look for it, and if we don't find it return so we can try
2034                 * everything over again.
2035                 */
2036                search_start = *offset;
2037                search_bytes = ctl->unit;
2038                ret = search_bitmap(ctl, bitmap_info, &search_start,
2039                                    &search_bytes, false);
2040                if (ret < 0 || search_start != *offset)
2041                        return -EAGAIN;
2042
2043                goto again;
2044        } else if (!bitmap_info->bytes)
2045                free_bitmap(ctl, bitmap_info);
2046
2047        return 0;
2048}
2049
2050static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2051                               struct btrfs_free_space *info, u64 offset,
2052                               u64 bytes, enum btrfs_trim_state trim_state)
2053{
2054        u64 bytes_to_set = 0;
2055        u64 end;
2056
2057        /*
2058         * This is a tradeoff to make bitmap trim state minimal.  We mark the
2059         * whole bitmap untrimmed if at any point we add untrimmed regions.
2060         */
2061        if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2062                if (btrfs_free_space_trimmed(info)) {
2063                        ctl->discardable_extents[BTRFS_STAT_CURR] +=
2064                                info->bitmap_extents;
2065                        ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2066                }
2067                info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2068        }
2069
2070        end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2071
2072        bytes_to_set = min(end - offset, bytes);
2073
2074        bitmap_set_bits(ctl, info, offset, bytes_to_set);
2075
2076        /*
2077         * We set some bytes, we have no idea what the max extent size is
2078         * anymore.
2079         */
2080        info->max_extent_size = 0;
2081
2082        return bytes_to_set;
2083
2084}
2085
2086static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2087                      struct btrfs_free_space *info)
2088{
2089        struct btrfs_block_group *block_group = ctl->private;
2090        struct btrfs_fs_info *fs_info = block_group->fs_info;
2091        bool forced = false;
2092
2093#ifdef CONFIG_BTRFS_DEBUG
2094        if (btrfs_should_fragment_free_space(block_group))
2095                forced = true;
2096#endif
2097
2098        /* This is a way to reclaim large regions from the bitmaps. */
2099        if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2100                return false;
2101
2102        /*
2103         * If we are below the extents threshold then we can add this as an
2104         * extent, and don't have to deal with the bitmap
2105         */
2106        if (!forced && ctl->free_extents < ctl->extents_thresh) {
2107                /*
2108                 * If this block group has some small extents we don't want to
2109                 * use up all of our free slots in the cache with them, we want
2110                 * to reserve them to larger extents, however if we have plenty
2111                 * of cache left then go ahead an dadd them, no sense in adding
2112                 * the overhead of a bitmap if we don't have to.
2113                 */
2114                if (info->bytes <= fs_info->sectorsize * 8) {
2115                        if (ctl->free_extents * 3 <= ctl->extents_thresh)
2116                                return false;
2117                } else {
2118                        return false;
2119                }
2120        }
2121
2122        /*
2123         * The original block groups from mkfs can be really small, like 8
2124         * megabytes, so don't bother with a bitmap for those entries.  However
2125         * some block groups can be smaller than what a bitmap would cover but
2126         * are still large enough that they could overflow the 32k memory limit,
2127         * so allow those block groups to still be allowed to have a bitmap
2128         * entry.
2129         */
2130        if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
2131                return false;
2132
2133        return true;
2134}
2135
2136static const struct btrfs_free_space_op free_space_op = {
2137        .recalc_thresholds      = recalculate_thresholds,
2138        .use_bitmap             = use_bitmap,
2139};
2140
2141static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2142                              struct btrfs_free_space *info)
2143{
2144        struct btrfs_free_space *bitmap_info;
2145        struct btrfs_block_group *block_group = NULL;
2146        int added = 0;
2147        u64 bytes, offset, bytes_added;
2148        enum btrfs_trim_state trim_state;
2149        int ret;
2150
2151        bytes = info->bytes;
2152        offset = info->offset;
2153        trim_state = info->trim_state;
2154
2155        if (!ctl->op->use_bitmap(ctl, info))
2156                return 0;
2157
2158        if (ctl->op == &free_space_op)
2159                block_group = ctl->private;
2160again:
2161        /*
2162         * Since we link bitmaps right into the cluster we need to see if we
2163         * have a cluster here, and if so and it has our bitmap we need to add
2164         * the free space to that bitmap.
2165         */
2166        if (block_group && !list_empty(&block_group->cluster_list)) {
2167                struct btrfs_free_cluster *cluster;
2168                struct rb_node *node;
2169                struct btrfs_free_space *entry;
2170
2171                cluster = list_entry(block_group->cluster_list.next,
2172                                     struct btrfs_free_cluster,
2173                                     block_group_list);
2174                spin_lock(&cluster->lock);
2175                node = rb_first(&cluster->root);
2176                if (!node) {
2177                        spin_unlock(&cluster->lock);
2178                        goto no_cluster_bitmap;
2179                }
2180
2181                entry = rb_entry(node, struct btrfs_free_space, offset_index);
2182                if (!entry->bitmap) {
2183                        spin_unlock(&cluster->lock);
2184                        goto no_cluster_bitmap;
2185                }
2186
2187                if (entry->offset == offset_to_bitmap(ctl, offset)) {
2188                        bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2189                                                          bytes, trim_state);
2190                        bytes -= bytes_added;
2191                        offset += bytes_added;
2192                }
2193                spin_unlock(&cluster->lock);
2194                if (!bytes) {
2195                        ret = 1;
2196                        goto out;
2197                }
2198        }
2199
2200no_cluster_bitmap:
2201        bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2202                                         1, 0);
2203        if (!bitmap_info) {
2204                ASSERT(added == 0);
2205                goto new_bitmap;
2206        }
2207
2208        bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2209                                          trim_state);
2210        bytes -= bytes_added;
2211        offset += bytes_added;
2212        added = 0;
2213
2214        if (!bytes) {
2215                ret = 1;
2216                goto out;
2217        } else
2218                goto again;
2219
2220new_bitmap:
2221        if (info && info->bitmap) {
2222                add_new_bitmap(ctl, info, offset);
2223                added = 1;
2224                info = NULL;
2225                goto again;
2226        } else {
2227                spin_unlock(&ctl->tree_lock);
2228
2229                /* no pre-allocated info, allocate a new one */
2230                if (!info) {
2231                        info = kmem_cache_zalloc(btrfs_free_space_cachep,
2232                                                 GFP_NOFS);
2233                        if (!info) {
2234                                spin_lock(&ctl->tree_lock);
2235                                ret = -ENOMEM;
2236                                goto out;
2237                        }
2238                }
2239
2240                /* allocate the bitmap */
2241                info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2242                                                 GFP_NOFS);
2243                info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
2244                spin_lock(&ctl->tree_lock);
2245                if (!info->bitmap) {
2246                        ret = -ENOMEM;
2247                        goto out;
2248                }
2249                goto again;
2250        }
2251
2252out:
2253        if (info) {
2254                if (info->bitmap)
2255                        kmem_cache_free(btrfs_free_space_bitmap_cachep,
2256                                        info->bitmap);
2257                kmem_cache_free(btrfs_free_space_cachep, info);
2258        }
2259
2260        return ret;
2261}
2262
2263/*
2264 * Free space merging rules:
2265 *  1) Merge trimmed areas together
2266 *  2) Let untrimmed areas coalesce with trimmed areas
2267 *  3) Always pull neighboring regions from bitmaps
2268 *
2269 * The above rules are for when we merge free space based on btrfs_trim_state.
2270 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2271 * same reason: to promote larger extent regions which makes life easier for
2272 * find_free_extent().  Rule 2 enables coalescing based on the common path
2273 * being returning free space from btrfs_finish_extent_commit().  So when free
2274 * space is trimmed, it will prevent aggregating trimmed new region and
2275 * untrimmed regions in the rb_tree.  Rule 3 is purely to obtain larger extents
2276 * and provide find_free_extent() with the largest extents possible hoping for
2277 * the reuse path.
2278 */
2279static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
2280                          struct btrfs_free_space *info, bool update_stat)
2281{
2282        struct btrfs_free_space *left_info = NULL;
2283        struct btrfs_free_space *right_info;
2284        bool merged = false;
2285        u64 offset = info->offset;
2286        u64 bytes = info->bytes;
2287        const bool is_trimmed = btrfs_free_space_trimmed(info);
2288
2289        /*
2290         * first we want to see if there is free space adjacent to the range we
2291         * are adding, if there is remove that struct and add a new one to
2292         * cover the entire range
2293         */
2294        right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
2295        if (right_info && rb_prev(&right_info->offset_index))
2296                left_info = rb_entry(rb_prev(&right_info->offset_index),
2297                                     struct btrfs_free_space, offset_index);
2298        else if (!right_info)
2299                left_info = tree_search_offset(ctl, offset - 1, 0, 0);
2300
2301        /* See try_merge_free_space() comment. */
2302        if (right_info && !right_info->bitmap &&
2303            (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
2304                if (update_stat)
2305                        unlink_free_space(ctl, right_info);
2306                else
2307                        __unlink_free_space(ctl, right_info);
2308                info->bytes += right_info->bytes;
2309                kmem_cache_free(btrfs_free_space_cachep, right_info);
2310                merged = true;
2311        }
2312
2313        /* See try_merge_free_space() comment. */
2314        if (left_info && !left_info->bitmap &&
2315            left_info->offset + left_info->bytes == offset &&
2316            (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
2317                if (update_stat)
2318                        unlink_free_space(ctl, left_info);
2319                else
2320                        __unlink_free_space(ctl, left_info);
2321                info->offset = left_info->offset;
2322                info->bytes += left_info->bytes;
2323                kmem_cache_free(btrfs_free_space_cachep, left_info);
2324                merged = true;
2325        }
2326
2327        return merged;
2328}
2329
2330static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2331                                     struct btrfs_free_space *info,
2332                                     bool update_stat)
2333{
2334        struct btrfs_free_space *bitmap;
2335        unsigned long i;
2336        unsigned long j;
2337        const u64 end = info->offset + info->bytes;
2338        const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2339        u64 bytes;
2340
2341        bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2342        if (!bitmap)
2343                return false;
2344
2345        i = offset_to_bit(bitmap->offset, ctl->unit, end);
2346        j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2347        if (j == i)
2348                return false;
2349        bytes = (j - i) * ctl->unit;
2350        info->bytes += bytes;
2351
2352        /* See try_merge_free_space() comment. */
2353        if (!btrfs_free_space_trimmed(bitmap))
2354                info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2355
2356        if (update_stat)
2357                bitmap_clear_bits(ctl, bitmap, end, bytes);
2358        else
2359                __bitmap_clear_bits(ctl, bitmap, end, bytes);
2360
2361        if (!bitmap->bytes)
2362                free_bitmap(ctl, bitmap);
2363
2364        return true;
2365}
2366
2367static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2368                                       struct btrfs_free_space *info,
2369                                       bool update_stat)
2370{
2371        struct btrfs_free_space *bitmap;
2372        u64 bitmap_offset;
2373        unsigned long i;
2374        unsigned long j;
2375        unsigned long prev_j;
2376        u64 bytes;
2377
2378        bitmap_offset = offset_to_bitmap(ctl, info->offset);
2379        /* If we're on a boundary, try the previous logical bitmap. */
2380        if (bitmap_offset == info->offset) {
2381                if (info->offset == 0)
2382                        return false;
2383                bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2384        }
2385
2386        bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2387        if (!bitmap)
2388                return false;
2389
2390        i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2391        j = 0;
2392        prev_j = (unsigned long)-1;
2393        for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2394                if (j > i)
2395                        break;
2396                prev_j = j;
2397        }
2398        if (prev_j == i)
2399                return false;
2400
2401        if (prev_j == (unsigned long)-1)
2402                bytes = (i + 1) * ctl->unit;
2403        else
2404                bytes = (i - prev_j) * ctl->unit;
2405
2406        info->offset -= bytes;
2407        info->bytes += bytes;
2408
2409        /* See try_merge_free_space() comment. */
2410        if (!btrfs_free_space_trimmed(bitmap))
2411                info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2412
2413        if (update_stat)
2414                bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2415        else
2416                __bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2417
2418        if (!bitmap->bytes)
2419                free_bitmap(ctl, bitmap);
2420
2421        return true;
2422}
2423
2424/*
2425 * We prefer always to allocate from extent entries, both for clustered and
2426 * non-clustered allocation requests. So when attempting to add a new extent
2427 * entry, try to see if there's adjacent free space in bitmap entries, and if
2428 * there is, migrate that space from the bitmaps to the extent.
2429 * Like this we get better chances of satisfying space allocation requests
2430 * because we attempt to satisfy them based on a single cache entry, and never
2431 * on 2 or more entries - even if the entries represent a contiguous free space
2432 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2433 * ends).
2434 */
2435static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2436                              struct btrfs_free_space *info,
2437                              bool update_stat)
2438{
2439        /*
2440         * Only work with disconnected entries, as we can change their offset,
2441         * and must be extent entries.
2442         */
2443        ASSERT(!info->bitmap);
2444        ASSERT(RB_EMPTY_NODE(&info->offset_index));
2445
2446        if (ctl->total_bitmaps > 0) {
2447                bool stole_end;
2448                bool stole_front = false;
2449
2450                stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2451                if (ctl->total_bitmaps > 0)
2452                        stole_front = steal_from_bitmap_to_front(ctl, info,
2453                                                                 update_stat);
2454
2455                if (stole_end || stole_front)
2456                        try_merge_free_space(ctl, info, update_stat);
2457        }
2458}
2459
2460int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2461                           struct btrfs_free_space_ctl *ctl,
2462                           u64 offset, u64 bytes,
2463                           enum btrfs_trim_state trim_state)
2464{
2465        struct btrfs_block_group *block_group = ctl->private;
2466        struct btrfs_free_space *info;
2467        int ret = 0;
2468        u64 filter_bytes = bytes;
2469
2470        info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2471        if (!info)
2472                return -ENOMEM;
2473
2474        info->offset = offset;
2475        info->bytes = bytes;
2476        info->trim_state = trim_state;
2477        RB_CLEAR_NODE(&info->offset_index);
2478
2479        spin_lock(&ctl->tree_lock);
2480
2481        if (try_merge_free_space(ctl, info, true))
2482                goto link;
2483
2484        /*
2485         * There was no extent directly to the left or right of this new
2486         * extent then we know we're going to have to allocate a new extent, so
2487         * before we do that see if we need to drop this into a bitmap
2488         */
2489        ret = insert_into_bitmap(ctl, info);
2490        if (ret < 0) {
2491                goto out;
2492        } else if (ret) {
2493                ret = 0;
2494                goto out;
2495        }
2496link:
2497        /*
2498         * Only steal free space from adjacent bitmaps if we're sure we're not
2499         * going to add the new free space to existing bitmap entries - because
2500         * that would mean unnecessary work that would be reverted. Therefore
2501         * attempt to steal space from bitmaps if we're adding an extent entry.
2502         */
2503        steal_from_bitmap(ctl, info, true);
2504
2505        filter_bytes = max(filter_bytes, info->bytes);
2506
2507        ret = link_free_space(ctl, info);
2508        if (ret)
2509                kmem_cache_free(btrfs_free_space_cachep, info);
2510out:
2511        btrfs_discard_update_discardable(block_group, ctl);
2512        spin_unlock(&ctl->tree_lock);
2513
2514        if (ret) {
2515                btrfs_crit(fs_info, "unable to add free space :%d", ret);
2516                ASSERT(ret != -EEXIST);
2517        }
2518
2519        if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2520                btrfs_discard_check_filter(block_group, filter_bytes);
2521                btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2522        }
2523
2524        return ret;
2525}
2526
2527int btrfs_add_free_space(struct btrfs_block_group *block_group,
2528                         u64 bytenr, u64 size)
2529{
2530        enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2531
2532        if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2533                trim_state = BTRFS_TRIM_STATE_TRIMMED;
2534
2535        return __btrfs_add_free_space(block_group->fs_info,
2536                                      block_group->free_space_ctl,
2537                                      bytenr, size, trim_state);
2538}
2539
2540/*
2541 * This is a subtle distinction because when adding free space back in general,
2542 * we want it to be added as untrimmed for async. But in the case where we add
2543 * it on loading of a block group, we want to consider it trimmed.
2544 */
2545int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2546                                       u64 bytenr, u64 size)
2547{
2548        enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2549
2550        if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2551            btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2552                trim_state = BTRFS_TRIM_STATE_TRIMMED;
2553
2554        return __btrfs_add_free_space(block_group->fs_info,
2555                                      block_group->free_space_ctl,
2556                                      bytenr, size, trim_state);
2557}
2558
2559int btrfs_remove_free_space(struct btrfs_block_group *block_group,
2560                            u64 offset, u64 bytes)
2561{
2562        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2563        struct btrfs_free_space *info;
2564        int ret;
2565        bool re_search = false;
2566
2567        spin_lock(&ctl->tree_lock);
2568
2569again:
2570        ret = 0;
2571        if (!bytes)
2572                goto out_lock;
2573
2574        info = tree_search_offset(ctl, offset, 0, 0);
2575        if (!info) {
2576                /*
2577                 * oops didn't find an extent that matched the space we wanted
2578                 * to remove, look for a bitmap instead
2579                 */
2580                info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2581                                          1, 0);
2582                if (!info) {
2583                        /*
2584                         * If we found a partial bit of our free space in a
2585                         * bitmap but then couldn't find the other part this may
2586                         * be a problem, so WARN about it.
2587                         */
2588                        WARN_ON(re_search);
2589                        goto out_lock;
2590                }
2591        }
2592
2593        re_search = false;
2594        if (!info->bitmap) {
2595                unlink_free_space(ctl, info);
2596                if (offset == info->offset) {
2597                        u64 to_free = min(bytes, info->bytes);
2598
2599                        info->bytes -= to_free;
2600                        info->offset += to_free;
2601                        if (info->bytes) {
2602                                ret = link_free_space(ctl, info);
2603                                WARN_ON(ret);
2604                        } else {
2605                                kmem_cache_free(btrfs_free_space_cachep, info);
2606                        }
2607
2608                        offset += to_free;
2609                        bytes -= to_free;
2610                        goto again;
2611                } else {
2612                        u64 old_end = info->bytes + info->offset;
2613
2614                        info->bytes = offset - info->offset;
2615                        ret = link_free_space(ctl, info);
2616                        WARN_ON(ret);
2617                        if (ret)
2618                                goto out_lock;
2619
2620                        /* Not enough bytes in this entry to satisfy us */
2621                        if (old_end < offset + bytes) {
2622                                bytes -= old_end - offset;
2623                                offset = old_end;
2624                                goto again;
2625                        } else if (old_end == offset + bytes) {
2626                                /* all done */
2627                                goto out_lock;
2628                        }
2629                        spin_unlock(&ctl->tree_lock);
2630
2631                        ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2632                                                     offset + bytes,
2633                                                     old_end - (offset + bytes),
2634                                                     info->trim_state);
2635                        WARN_ON(ret);
2636                        goto out;
2637                }
2638        }
2639
2640        ret = remove_from_bitmap(ctl, info, &offset, &bytes);
2641        if (ret == -EAGAIN) {
2642                re_search = true;
2643                goto again;
2644        }
2645out_lock:
2646        btrfs_discard_update_discardable(block_group, ctl);
2647        spin_unlock(&ctl->tree_lock);
2648out:
2649        return ret;
2650}
2651
2652void btrfs_dump_free_space(struct btrfs_block_group *block_group,
2653                           u64 bytes)
2654{
2655        struct btrfs_fs_info *fs_info = block_group->fs_info;
2656        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2657        struct btrfs_free_space *info;
2658        struct rb_node *n;
2659        int count = 0;
2660
2661        spin_lock(&ctl->tree_lock);
2662        for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2663                info = rb_entry(n, struct btrfs_free_space, offset_index);
2664                if (info->bytes >= bytes && !block_group->ro)
2665                        count++;
2666                btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
2667                           info->offset, info->bytes,
2668                       (info->bitmap) ? "yes" : "no");
2669        }
2670        spin_unlock(&ctl->tree_lock);
2671        btrfs_info(fs_info, "block group has cluster?: %s",
2672               list_empty(&block_group->cluster_list) ? "no" : "yes");
2673        btrfs_info(fs_info,
2674                   "%d blocks of free space at or bigger than bytes is", count);
2675}
2676
2677void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
2678{
2679        struct btrfs_fs_info *fs_info = block_group->fs_info;
2680        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2681
2682        spin_lock_init(&ctl->tree_lock);
2683        ctl->unit = fs_info->sectorsize;
2684        ctl->start = block_group->start;
2685        ctl->private = block_group;
2686        ctl->op = &free_space_op;
2687        INIT_LIST_HEAD(&ctl->trimming_ranges);
2688        mutex_init(&ctl->cache_writeout_mutex);
2689
2690        /*
2691         * we only want to have 32k of ram per block group for keeping
2692         * track of free space, and if we pass 1/2 of that we want to
2693         * start converting things over to using bitmaps
2694         */
2695        ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
2696}
2697
2698/*
2699 * for a given cluster, put all of its extents back into the free
2700 * space cache.  If the block group passed doesn't match the block group
2701 * pointed to by the cluster, someone else raced in and freed the
2702 * cluster already.  In that case, we just return without changing anything
2703 */
2704static void __btrfs_return_cluster_to_free_space(
2705                             struct btrfs_block_group *block_group,
2706                             struct btrfs_free_cluster *cluster)
2707{
2708        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2709        struct btrfs_free_space *entry;
2710        struct rb_node *node;
2711
2712        spin_lock(&cluster->lock);
2713        if (cluster->block_group != block_group)
2714                goto out;
2715
2716        cluster->block_group = NULL;
2717        cluster->window_start = 0;
2718        list_del_init(&cluster->block_group_list);
2719
2720        node = rb_first(&cluster->root);
2721        while (node) {
2722                bool bitmap;
2723
2724                entry = rb_entry(node, struct btrfs_free_space, offset_index);
2725                node = rb_next(&entry->offset_index);
2726                rb_erase(&entry->offset_index, &cluster->root);
2727                RB_CLEAR_NODE(&entry->offset_index);
2728
2729                bitmap = (entry->bitmap != NULL);
2730                if (!bitmap) {
2731                        /* Merging treats extents as if they were new */
2732                        if (!btrfs_free_space_trimmed(entry)) {
2733                                ctl->discardable_extents[BTRFS_STAT_CURR]--;
2734                                ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2735                                        entry->bytes;
2736                        }
2737
2738                        try_merge_free_space(ctl, entry, false);
2739                        steal_from_bitmap(ctl, entry, false);
2740
2741                        /* As we insert directly, update these statistics */
2742                        if (!btrfs_free_space_trimmed(entry)) {
2743                                ctl->discardable_extents[BTRFS_STAT_CURR]++;
2744                                ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2745                                        entry->bytes;
2746                        }
2747                }
2748                tree_insert_offset(&ctl->free_space_offset,
2749                                   entry->offset, &entry->offset_index, bitmap);
2750        }
2751        cluster->root = RB_ROOT;
2752
2753out:
2754        spin_unlock(&cluster->lock);
2755        btrfs_put_block_group(block_group);
2756}
2757
2758static void __btrfs_remove_free_space_cache_locked(
2759                                struct btrfs_free_space_ctl *ctl)
2760{
2761        struct btrfs_free_space *info;
2762        struct rb_node *node;
2763
2764        while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2765                info = rb_entry(node, struct btrfs_free_space, offset_index);
2766                if (!info->bitmap) {
2767                        unlink_free_space(ctl, info);
2768                        kmem_cache_free(btrfs_free_space_cachep, info);
2769                } else {
2770                        free_bitmap(ctl, info);
2771                }
2772
2773                cond_resched_lock(&ctl->tree_lock);
2774        }
2775}
2776
2777void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2778{
2779        spin_lock(&ctl->tree_lock);
2780        __btrfs_remove_free_space_cache_locked(ctl);
2781        if (ctl->private)
2782                btrfs_discard_update_discardable(ctl->private, ctl);
2783        spin_unlock(&ctl->tree_lock);
2784}
2785
2786void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
2787{
2788        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2789        struct btrfs_free_cluster *cluster;
2790        struct list_head *head;
2791
2792        spin_lock(&ctl->tree_lock);
2793        while ((head = block_group->cluster_list.next) !=
2794               &block_group->cluster_list) {
2795                cluster = list_entry(head, struct btrfs_free_cluster,
2796                                     block_group_list);
2797
2798                WARN_ON(cluster->block_group != block_group);
2799                __btrfs_return_cluster_to_free_space(block_group, cluster);
2800
2801                cond_resched_lock(&ctl->tree_lock);
2802        }
2803        __btrfs_remove_free_space_cache_locked(ctl);
2804        btrfs_discard_update_discardable(block_group, ctl);
2805        spin_unlock(&ctl->tree_lock);
2806
2807}
2808
2809/**
2810 * btrfs_is_free_space_trimmed - see if everything is trimmed
2811 * @block_group: block_group of interest
2812 *
2813 * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2814 */
2815bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2816{
2817        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2818        struct btrfs_free_space *info;
2819        struct rb_node *node;
2820        bool ret = true;
2821
2822        spin_lock(&ctl->tree_lock);
2823        node = rb_first(&ctl->free_space_offset);
2824
2825        while (node) {
2826                info = rb_entry(node, struct btrfs_free_space, offset_index);
2827
2828                if (!btrfs_free_space_trimmed(info)) {
2829                        ret = false;
2830                        break;
2831                }
2832
2833                node = rb_next(node);
2834        }
2835
2836        spin_unlock(&ctl->tree_lock);
2837        return ret;
2838}
2839
2840u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
2841                               u64 offset, u64 bytes, u64 empty_size,
2842                               u64 *max_extent_size)
2843{
2844        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2845        struct btrfs_discard_ctl *discard_ctl =
2846                                        &block_group->fs_info->discard_ctl;
2847        struct btrfs_free_space *entry = NULL;
2848        u64 bytes_search = bytes + empty_size;
2849        u64 ret = 0;
2850        u64 align_gap = 0;
2851        u64 align_gap_len = 0;
2852        enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2853
2854        spin_lock(&ctl->tree_lock);
2855        entry = find_free_space(ctl, &offset, &bytes_search,
2856                                block_group->full_stripe_len, max_extent_size);
2857        if (!entry)
2858                goto out;
2859
2860        ret = offset;
2861        if (entry->bitmap) {
2862                bitmap_clear_bits(ctl, entry, offset, bytes);
2863
2864                if (!btrfs_free_space_trimmed(entry))
2865                        atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2866
2867                if (!entry->bytes)
2868                        free_bitmap(ctl, entry);
2869        } else {
2870                unlink_free_space(ctl, entry);
2871                align_gap_len = offset - entry->offset;
2872                align_gap = entry->offset;
2873                align_gap_trim_state = entry->trim_state;
2874
2875                if (!btrfs_free_space_trimmed(entry))
2876                        atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2877
2878                entry->offset = offset + bytes;
2879                WARN_ON(entry->bytes < bytes + align_gap_len);
2880
2881                entry->bytes -= bytes + align_gap_len;
2882                if (!entry->bytes)
2883                        kmem_cache_free(btrfs_free_space_cachep, entry);
2884                else
2885                        link_free_space(ctl, entry);
2886        }
2887out:
2888        btrfs_discard_update_discardable(block_group, ctl);
2889        spin_unlock(&ctl->tree_lock);
2890
2891        if (align_gap_len)
2892                __btrfs_add_free_space(block_group->fs_info, ctl,
2893                                       align_gap, align_gap_len,
2894                                       align_gap_trim_state);
2895        return ret;
2896}
2897
2898/*
2899 * given a cluster, put all of its extents back into the free space
2900 * cache.  If a block group is passed, this function will only free
2901 * a cluster that belongs to the passed block group.
2902 *
2903 * Otherwise, it'll get a reference on the block group pointed to by the
2904 * cluster and remove the cluster from it.
2905 */
2906void btrfs_return_cluster_to_free_space(
2907                               struct btrfs_block_group *block_group,
2908                               struct btrfs_free_cluster *cluster)
2909{
2910        struct btrfs_free_space_ctl *ctl;
2911
2912        /* first, get a safe pointer to the block group */
2913        spin_lock(&cluster->lock);
2914        if (!block_group) {
2915                block_group = cluster->block_group;
2916                if (!block_group) {
2917                        spin_unlock(&cluster->lock);
2918                        return;
2919                }
2920        } else if (cluster->block_group != block_group) {
2921                /* someone else has already freed it don't redo their work */
2922                spin_unlock(&cluster->lock);
2923                return;
2924        }
2925        btrfs_get_block_group(block_group);
2926        spin_unlock(&cluster->lock);
2927
2928        ctl = block_group->free_space_ctl;
2929
2930        /* now return any extents the cluster had on it */
2931        spin_lock(&ctl->tree_lock);
2932        __btrfs_return_cluster_to_free_space(block_group, cluster);
2933        spin_unlock(&ctl->tree_lock);
2934
2935        btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
2936
2937        /* finally drop our ref */
2938        btrfs_put_block_group(block_group);
2939}
2940
2941static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
2942                                   struct btrfs_free_cluster *cluster,
2943                                   struct btrfs_free_space *entry,
2944                                   u64 bytes, u64 min_start,
2945                                   u64 *max_extent_size)
2946{
2947        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2948        int err;
2949        u64 search_start = cluster->window_start;
2950        u64 search_bytes = bytes;
2951        u64 ret = 0;
2952
2953        search_start = min_start;
2954        search_bytes = bytes;
2955
2956        err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
2957        if (err) {
2958                *max_extent_size = max(get_max_extent_size(entry),
2959                                       *max_extent_size);
2960                return 0;
2961        }
2962
2963        ret = search_start;
2964        __bitmap_clear_bits(ctl, entry, ret, bytes);
2965
2966        return ret;
2967}
2968
2969/*
2970 * given a cluster, try to allocate 'bytes' from it, returns 0
2971 * if it couldn't find anything suitably large, or a logical disk offset
2972 * if things worked out
2973 */
2974u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
2975                             struct btrfs_free_cluster *cluster, u64 bytes,
2976                             u64 min_start, u64 *max_extent_size)
2977{
2978        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2979        struct btrfs_discard_ctl *discard_ctl =
2980                                        &block_group->fs_info->discard_ctl;
2981        struct btrfs_free_space *entry = NULL;
2982        struct rb_node *node;
2983        u64 ret = 0;
2984
2985        spin_lock(&cluster->lock);
2986        if (bytes > cluster->max_size)
2987                goto out;
2988
2989        if (cluster->block_group != block_group)
2990                goto out;
2991
2992        node = rb_first(&cluster->root);
2993        if (!node)
2994                goto out;
2995
2996        entry = rb_entry(node, struct btrfs_free_space, offset_index);
2997        while (1) {
2998                if (entry->bytes < bytes)
2999                        *max_extent_size = max(get_max_extent_size(entry),
3000                                               *max_extent_size);
3001
3002                if (entry->bytes < bytes ||
3003                    (!entry->bitmap && entry->offset < min_start)) {
3004                        node = rb_next(&entry->offset_index);
3005                        if (!node)
3006                                break;
3007                        entry = rb_entry(node, struct btrfs_free_space,
3008                                         offset_index);
3009                        continue;
3010                }
3011
3012                if (entry->bitmap) {
3013                        ret = btrfs_alloc_from_bitmap(block_group,
3014                                                      cluster, entry, bytes,
3015                                                      cluster->window_start,
3016                                                      max_extent_size);
3017                        if (ret == 0) {
3018                                node = rb_next(&entry->offset_index);
3019                                if (!node)
3020                                        break;
3021                                entry = rb_entry(node, struct btrfs_free_space,
3022                                                 offset_index);
3023                                continue;
3024                        }
3025                        cluster->window_start += bytes;
3026                } else {
3027                        ret = entry->offset;
3028
3029                        entry->offset += bytes;
3030                        entry->bytes -= bytes;
3031                }
3032
3033                if (entry->bytes == 0)
3034                        rb_erase(&entry->offset_index, &cluster->root);
3035                break;
3036        }
3037out:
3038        spin_unlock(&cluster->lock);
3039
3040        if (!ret)
3041                return 0;
3042
3043        spin_lock(&ctl->tree_lock);
3044
3045        if (!btrfs_free_space_trimmed(entry))
3046                atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3047
3048        ctl->free_space -= bytes;
3049        if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3050                ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3051        if (entry->bytes == 0) {
3052                ctl->free_extents--;
3053                if (entry->bitmap) {
3054                        kmem_cache_free(btrfs_free_space_bitmap_cachep,
3055                                        entry->bitmap);
3056                        ctl->total_bitmaps--;
3057                        ctl->op->recalc_thresholds(ctl);
3058                } else if (!btrfs_free_space_trimmed(entry)) {
3059                        ctl->discardable_extents[BTRFS_STAT_CURR]--;
3060                }
3061                kmem_cache_free(btrfs_free_space_cachep, entry);
3062        }
3063
3064        spin_unlock(&ctl->tree_lock);
3065
3066        return ret;
3067}
3068
3069static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
3070                                struct btrfs_free_space *entry,
3071                                struct btrfs_free_cluster *cluster,
3072                                u64 offset, u64 bytes,
3073                                u64 cont1_bytes, u64 min_bytes)
3074{
3075        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3076        unsigned long next_zero;
3077        unsigned long i;
3078        unsigned long want_bits;
3079        unsigned long min_bits;
3080        unsigned long found_bits;
3081        unsigned long max_bits = 0;
3082        unsigned long start = 0;
3083        unsigned long total_found = 0;
3084        int ret;
3085
3086        i = offset_to_bit(entry->offset, ctl->unit,
3087                          max_t(u64, offset, entry->offset));
3088        want_bits = bytes_to_bits(bytes, ctl->unit);
3089        min_bits = bytes_to_bits(min_bytes, ctl->unit);
3090
3091        /*
3092         * Don't bother looking for a cluster in this bitmap if it's heavily
3093         * fragmented.
3094         */
3095        if (entry->max_extent_size &&
3096            entry->max_extent_size < cont1_bytes)
3097                return -ENOSPC;
3098again:
3099        found_bits = 0;
3100        for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
3101                next_zero = find_next_zero_bit(entry->bitmap,
3102                                               BITS_PER_BITMAP, i);
3103                if (next_zero - i >= min_bits) {
3104                        found_bits = next_zero - i;
3105                        if (found_bits > max_bits)
3106                                max_bits = found_bits;
3107                        break;
3108                }
3109                if (next_zero - i > max_bits)
3110                        max_bits = next_zero - i;
3111                i = next_zero;
3112        }
3113
3114        if (!found_bits) {
3115                entry->max_extent_size = (u64)max_bits * ctl->unit;
3116                return -ENOSPC;
3117        }
3118
3119        if (!total_found) {
3120                start = i;
3121                cluster->max_size = 0;
3122        }
3123
3124        total_found += found_bits;
3125
3126        if (cluster->max_size < found_bits * ctl->unit)
3127                cluster->max_size = found_bits * ctl->unit;
3128
3129        if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3130                i = next_zero + 1;
3131                goto again;
3132        }
3133
3134        cluster->window_start = start * ctl->unit + entry->offset;
3135        rb_erase(&entry->offset_index, &ctl->free_space_offset);
3136        ret = tree_insert_offset(&cluster->root, entry->offset,
3137                                 &entry->offset_index, 1);
3138        ASSERT(!ret); /* -EEXIST; Logic error */
3139
3140        trace_btrfs_setup_cluster(block_group, cluster,
3141                                  total_found * ctl->unit, 1);
3142        return 0;
3143}
3144
3145/*
3146 * This searches the block group for just extents to fill the cluster with.
3147 * Try to find a cluster with at least bytes total bytes, at least one
3148 * extent of cont1_bytes, and other clusters of at least min_bytes.
3149 */
3150static noinline int
3151setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3152                        struct btrfs_free_cluster *cluster,
3153                        struct list_head *bitmaps, u64 offset, u64 bytes,
3154                        u64 cont1_bytes, u64 min_bytes)
3155{
3156        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3157        struct btrfs_free_space *first = NULL;
3158        struct btrfs_free_space *entry = NULL;
3159        struct btrfs_free_space *last;
3160        struct rb_node *node;
3161        u64 window_free;
3162        u64 max_extent;
3163        u64 total_size = 0;
3164
3165        entry = tree_search_offset(ctl, offset, 0, 1);
3166        if (!entry)
3167                return -ENOSPC;
3168
3169        /*
3170         * We don't want bitmaps, so just move along until we find a normal
3171         * extent entry.
3172         */
3173        while (entry->bitmap || entry->bytes < min_bytes) {
3174                if (entry->bitmap && list_empty(&entry->list))
3175                        list_add_tail(&entry->list, bitmaps);
3176                node = rb_next(&entry->offset_index);
3177                if (!node)
3178                        return -ENOSPC;
3179                entry = rb_entry(node, struct btrfs_free_space, offset_index);
3180        }
3181
3182        window_free = entry->bytes;
3183        max_extent = entry->bytes;
3184        first = entry;
3185        last = entry;
3186
3187        for (node = rb_next(&entry->offset_index); node;
3188             node = rb_next(&entry->offset_index)) {
3189                entry = rb_entry(node, struct btrfs_free_space, offset_index);
3190
3191                if (entry->bitmap) {
3192                        if (list_empty(&entry->list))
3193                                list_add_tail(&entry->list, bitmaps);
3194                        continue;
3195                }
3196
3197                if (entry->bytes < min_bytes)
3198                        continue;
3199
3200                last = entry;
3201                window_free += entry->bytes;
3202                if (entry->bytes > max_extent)
3203                        max_extent = entry->bytes;
3204        }
3205
3206        if (window_free < bytes || max_extent < cont1_bytes)
3207                return -ENOSPC;
3208
3209        cluster->window_start = first->offset;
3210
3211        node = &first->offset_index;
3212
3213        /*
3214         * now we've found our entries, pull them out of the free space
3215         * cache and put them into the cluster rbtree
3216         */
3217        do {
3218                int ret;
3219
3220                entry = rb_entry(node, struct btrfs_free_space, offset_index);
3221                node = rb_next(&entry->offset_index);
3222                if (entry->bitmap || entry->bytes < min_bytes)
3223                        continue;
3224
3225                rb_erase(&entry->offset_index, &ctl->free_space_offset);
3226                ret = tree_insert_offset(&cluster->root, entry->offset,
3227                                         &entry->offset_index, 0);
3228                total_size += entry->bytes;
3229                ASSERT(!ret); /* -EEXIST; Logic error */
3230        } while (node && entry != last);
3231
3232        cluster->max_size = max_extent;
3233        trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
3234        return 0;
3235}
3236
3237/*
3238 * This specifically looks for bitmaps that may work in the cluster, we assume
3239 * that we have already failed to find extents that will work.
3240 */
3241static noinline int
3242setup_cluster_bitmap(struct btrfs_block_group *block_group,
3243                     struct btrfs_free_cluster *cluster,
3244                     struct list_head *bitmaps, u64 offset, u64 bytes,
3245                     u64 cont1_bytes, u64 min_bytes)
3246{
3247        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3248        struct btrfs_free_space *entry = NULL;
3249        int ret = -ENOSPC;
3250        u64 bitmap_offset = offset_to_bitmap(ctl, offset);
3251
3252        if (ctl->total_bitmaps == 0)
3253                return -ENOSPC;
3254
3255        /*
3256         * The bitmap that covers offset won't be in the list unless offset
3257         * is just its start offset.
3258         */
3259        if (!list_empty(bitmaps))
3260                entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3261
3262        if (!entry || entry->offset != bitmap_offset) {
3263                entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3264                if (entry && list_empty(&entry->list))
3265                        list_add(&entry->list, bitmaps);
3266        }
3267
3268        list_for_each_entry(entry, bitmaps, list) {
3269                if (entry->bytes < bytes)
3270                        continue;
3271                ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
3272                                           bytes, cont1_bytes, min_bytes);
3273                if (!ret)
3274                        return 0;
3275        }
3276
3277        /*
3278         * The bitmaps list has all the bitmaps that record free space
3279         * starting after offset, so no more search is required.
3280         */
3281        return -ENOSPC;
3282}
3283
3284/*
3285 * here we try to find a cluster of blocks in a block group.  The goal
3286 * is to find at least bytes+empty_size.
3287 * We might not find them all in one contiguous area.
3288 *
3289 * returns zero and sets up cluster if things worked out, otherwise
3290 * it returns -enospc
3291 */
3292int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
3293                             struct btrfs_free_cluster *cluster,
3294                             u64 offset, u64 bytes, u64 empty_size)
3295{
3296        struct btrfs_fs_info *fs_info = block_group->fs_info;
3297        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3298        struct btrfs_free_space *entry, *tmp;
3299        LIST_HEAD(bitmaps);
3300        u64 min_bytes;
3301        u64 cont1_bytes;
3302        int ret;
3303
3304        /*
3305         * Choose the minimum extent size we'll require for this
3306         * cluster.  For SSD_SPREAD, don't allow any fragmentation.
3307         * For metadata, allow allocates with smaller extents.  For
3308         * data, keep it dense.
3309         */
3310        if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
3311                cont1_bytes = min_bytes = bytes + empty_size;
3312        } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
3313                cont1_bytes = bytes;
3314                min_bytes = fs_info->sectorsize;
3315        } else {
3316                cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
3317                min_bytes = fs_info->sectorsize;
3318        }
3319
3320        spin_lock(&ctl->tree_lock);
3321
3322        /*
3323         * If we know we don't have enough space to make a cluster don't even
3324         * bother doing all the work to try and find one.
3325         */
3326        if (ctl->free_space < bytes) {
3327                spin_unlock(&ctl->tree_lock);
3328                return -ENOSPC;
3329        }
3330
3331        spin_lock(&cluster->lock);
3332
3333        /* someone already found a cluster, hooray */
3334        if (cluster->block_group) {
3335                ret = 0;
3336                goto out;
3337        }
3338
3339        trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3340                                 min_bytes);
3341
3342        ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
3343                                      bytes + empty_size,
3344                                      cont1_bytes, min_bytes);
3345        if (ret)
3346                ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
3347                                           offset, bytes + empty_size,
3348                                           cont1_bytes, min_bytes);
3349
3350        /* Clear our temporary list */
3351        list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3352                list_del_init(&entry->list);
3353
3354        if (!ret) {
3355                btrfs_get_block_group(block_group);
3356                list_add_tail(&cluster->block_group_list,
3357                              &block_group->cluster_list);
3358                cluster->block_group = block_group;
3359        } else {
3360                trace_btrfs_failed_cluster_setup(block_group);
3361        }
3362out:
3363        spin_unlock(&cluster->lock);
3364        spin_unlock(&ctl->tree_lock);
3365
3366        return ret;
3367}
3368
3369/*
3370 * simple code to zero out a cluster
3371 */
3372void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3373{
3374        spin_lock_init(&cluster->lock);
3375        spin_lock_init(&cluster->refill_lock);
3376        cluster->root = RB_ROOT;
3377        cluster->max_size = 0;
3378        cluster->fragmented = false;
3379        INIT_LIST_HEAD(&cluster->block_group_list);
3380        cluster->block_group = NULL;
3381}
3382
3383static int do_trimming(struct btrfs_block_group *block_group,
3384                       u64 *total_trimmed, u64 start, u64 bytes,
3385                       u64 reserved_start, u64 reserved_bytes,
3386                       enum btrfs_trim_state reserved_trim_state,
3387                       struct btrfs_trim_range *trim_entry)
3388{
3389        struct btrfs_space_info *space_info = block_group->space_info;
3390        struct btrfs_fs_info *fs_info = block_group->fs_info;
3391        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3392        int ret;
3393        int update = 0;
3394        const u64 end = start + bytes;
3395        const u64 reserved_end = reserved_start + reserved_bytes;
3396        enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3397        u64 trimmed = 0;
3398
3399        spin_lock(&space_info->lock);
3400        spin_lock(&block_group->lock);
3401        if (!block_group->ro) {
3402                block_group->reserved += reserved_bytes;
3403                space_info->bytes_reserved += reserved_bytes;
3404                update = 1;
3405        }
3406        spin_unlock(&block_group->lock);
3407        spin_unlock(&space_info->lock);
3408
3409        ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3410        if (!ret) {
3411                *total_trimmed += trimmed;
3412                trim_state = BTRFS_TRIM_STATE_TRIMMED;
3413        }
3414
3415        mutex_lock(&ctl->cache_writeout_mutex);
3416        if (reserved_start < start)
3417                __btrfs_add_free_space(fs_info, ctl, reserved_start,
3418                                       start - reserved_start,
3419                                       reserved_trim_state);
3420        if (start + bytes < reserved_start + reserved_bytes)
3421                __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3422                                       reserved_trim_state);
3423        __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
3424        list_del(&trim_entry->list);
3425        mutex_unlock(&ctl->cache_writeout_mutex);
3426
3427        if (update) {
3428                spin_lock(&space_info->lock);
3429                spin_lock(&block_group->lock);
3430                if (block_group->ro)
3431                        space_info->bytes_readonly += reserved_bytes;
3432                block_group->reserved -= reserved_bytes;
3433                space_info->bytes_reserved -= reserved_bytes;
3434                spin_unlock(&block_group->lock);
3435                spin_unlock(&space_info->lock);
3436        }
3437
3438        return ret;
3439}
3440
3441/*
3442 * If @async is set, then we will trim 1 region and return.
3443 */
3444static int trim_no_bitmap(struct btrfs_block_group *block_group,
3445                          u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3446                          bool async)
3447{
3448        struct btrfs_discard_ctl *discard_ctl =
3449                                        &block_group->fs_info->discard_ctl;
3450        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3451        struct btrfs_free_space *entry;
3452        struct rb_node *node;
3453        int ret = 0;
3454        u64 extent_start;
3455        u64 extent_bytes;
3456        enum btrfs_trim_state extent_trim_state;
3457        u64 bytes;
3458        const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3459
3460        while (start < end) {
3461                struct btrfs_trim_range trim_entry;
3462
3463                mutex_lock(&ctl->cache_writeout_mutex);
3464                spin_lock(&ctl->tree_lock);
3465
3466                if (ctl->free_space < minlen)
3467                        goto out_unlock;
3468
3469                entry = tree_search_offset(ctl, start, 0, 1);
3470                if (!entry)
3471                        goto out_unlock;
3472
3473                /* Skip bitmaps and if async, already trimmed entries */
3474                while (entry->bitmap ||
3475                       (async && btrfs_free_space_trimmed(entry))) {
3476                        node = rb_next(&entry->offset_index);
3477                        if (!node)
3478                                goto out_unlock;
3479                        entry = rb_entry(node, struct btrfs_free_space,
3480                                         offset_index);
3481                }
3482
3483                if (entry->offset >= end)
3484                        goto out_unlock;
3485
3486                extent_start = entry->offset;
3487                extent_bytes = entry->bytes;
3488                extent_trim_state = entry->trim_state;
3489                if (async) {
3490                        start = entry->offset;
3491                        bytes = entry->bytes;
3492                        if (bytes < minlen) {
3493                                spin_unlock(&ctl->tree_lock);
3494                                mutex_unlock(&ctl->cache_writeout_mutex);
3495                                goto next;
3496                        }
3497                        unlink_free_space(ctl, entry);
3498                        /*
3499                         * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3500                         * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3501                         * X when we come back around.  So trim it now.
3502                         */
3503                        if (max_discard_size &&
3504                            bytes >= (max_discard_size +
3505                                      BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3506                                bytes = max_discard_size;
3507                                extent_bytes = max_discard_size;
3508                                entry->offset += max_discard_size;
3509                                entry->bytes -= max_discard_size;
3510                                link_free_space(ctl, entry);
3511                        } else {
3512                                kmem_cache_free(btrfs_free_space_cachep, entry);
3513                        }
3514                } else {
3515                        start = max(start, extent_start);
3516                        bytes = min(extent_start + extent_bytes, end) - start;
3517                        if (bytes < minlen) {
3518                                spin_unlock(&ctl->tree_lock);
3519                                mutex_unlock(&ctl->cache_writeout_mutex);
3520                                goto next;
3521                        }
3522
3523                        unlink_free_space(ctl, entry);
3524                        kmem_cache_free(btrfs_free_space_cachep, entry);
3525                }
3526
3527                spin_unlock(&ctl->tree_lock);
3528                trim_entry.start = extent_start;
3529                trim_entry.bytes = extent_bytes;
3530                list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3531                mutex_unlock(&ctl->cache_writeout_mutex);
3532
3533                ret = do_trimming(block_group, total_trimmed, start, bytes,
3534                                  extent_start, extent_bytes, extent_trim_state,
3535                                  &trim_entry);
3536                if (ret) {
3537                        block_group->discard_cursor = start + bytes;
3538                        break;
3539                }
3540next:
3541                start += bytes;
3542                block_group->discard_cursor = start;
3543                if (async && *total_trimmed)
3544                        break;
3545
3546                if (fatal_signal_pending(current)) {
3547                        ret = -ERESTARTSYS;
3548                        break;
3549                }
3550
3551                cond_resched();
3552        }
3553
3554        return ret;
3555
3556out_unlock:
3557        block_group->discard_cursor = btrfs_block_group_end(block_group);
3558        spin_unlock(&ctl->tree_lock);
3559        mutex_unlock(&ctl->cache_writeout_mutex);
3560
3561        return ret;
3562}
3563
3564/*
3565 * If we break out of trimming a bitmap prematurely, we should reset the
3566 * trimming bit.  In a rather contrieved case, it's possible to race here so
3567 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3568 *
3569 * start = start of bitmap
3570 * end = near end of bitmap
3571 *
3572 * Thread 1:                    Thread 2:
3573 * trim_bitmaps(start)
3574 *                              trim_bitmaps(end)
3575 *                              end_trimming_bitmap()
3576 * reset_trimming_bitmap()
3577 */
3578static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3579{
3580        struct btrfs_free_space *entry;
3581
3582        spin_lock(&ctl->tree_lock);
3583        entry = tree_search_offset(ctl, offset, 1, 0);
3584        if (entry) {
3585                if (btrfs_free_space_trimmed(entry)) {
3586                        ctl->discardable_extents[BTRFS_STAT_CURR] +=
3587                                entry->bitmap_extents;
3588                        ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3589                }
3590                entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3591        }
3592
3593        spin_unlock(&ctl->tree_lock);
3594}
3595
3596static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3597                                struct btrfs_free_space *entry)
3598{
3599        if (btrfs_free_space_trimming_bitmap(entry)) {
3600                entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3601                ctl->discardable_extents[BTRFS_STAT_CURR] -=
3602                        entry->bitmap_extents;
3603                ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3604        }
3605}
3606
3607/*
3608 * If @async is set, then we will trim 1 region and return.
3609 */
3610static int trim_bitmaps(struct btrfs_block_group *block_group,
3611                        u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3612                        u64 maxlen, bool async)
3613{
3614        struct btrfs_discard_ctl *discard_ctl =
3615                                        &block_group->fs_info->discard_ctl;
3616        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3617        struct btrfs_free_space *entry;
3618        int ret = 0;
3619        int ret2;
3620        u64 bytes;
3621        u64 offset = offset_to_bitmap(ctl, start);
3622        const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3623
3624        while (offset < end) {
3625                bool next_bitmap = false;
3626                struct btrfs_trim_range trim_entry;
3627
3628                mutex_lock(&ctl->cache_writeout_mutex);
3629                spin_lock(&ctl->tree_lock);
3630
3631                if (ctl->free_space < minlen) {
3632                        block_group->discard_cursor =
3633                                btrfs_block_group_end(block_group);
3634                        spin_unlock(&ctl->tree_lock);
3635                        mutex_unlock(&ctl->cache_writeout_mutex);
3636                        break;
3637                }
3638
3639                entry = tree_search_offset(ctl, offset, 1, 0);
3640                /*
3641                 * Bitmaps are marked trimmed lossily now to prevent constant
3642                 * discarding of the same bitmap (the reason why we are bound
3643                 * by the filters).  So, retrim the block group bitmaps when we
3644                 * are preparing to punt to the unused_bgs list.  This uses
3645                 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3646                 * which is the only discard index which sets minlen to 0.
3647                 */
3648                if (!entry || (async && minlen && start == offset &&
3649                               btrfs_free_space_trimmed(entry))) {
3650                        spin_unlock(&ctl->tree_lock);
3651                        mutex_unlock(&ctl->cache_writeout_mutex);
3652                        next_bitmap = true;
3653                        goto next;
3654                }
3655
3656                /*
3657                 * Async discard bitmap trimming begins at by setting the start
3658                 * to be key.objectid and the offset_to_bitmap() aligns to the
3659                 * start of the bitmap.  This lets us know we are fully
3660                 * scanning the bitmap rather than only some portion of it.
3661                 */
3662                if (start == offset)
3663                        entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3664
3665                bytes = minlen;
3666                ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3667                if (ret2 || start >= end) {
3668                        /*
3669                         * We lossily consider a bitmap trimmed if we only skip
3670                         * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3671                         */
3672                        if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3673                                end_trimming_bitmap(ctl, entry);
3674                        else
3675                                entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3676                        spin_unlock(&ctl->tree_lock);
3677                        mutex_unlock(&ctl->cache_writeout_mutex);
3678                        next_bitmap = true;
3679                        goto next;
3680                }
3681
3682                /*
3683                 * We already trimmed a region, but are using the locking above
3684                 * to reset the trim_state.
3685                 */
3686                if (async && *total_trimmed) {
3687                        spin_unlock(&ctl->tree_lock);
3688                        mutex_unlock(&ctl->cache_writeout_mutex);
3689                        goto out;
3690                }
3691
3692                bytes = min(bytes, end - start);
3693                if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
3694                        spin_unlock(&ctl->tree_lock);
3695                        mutex_unlock(&ctl->cache_writeout_mutex);
3696                        goto next;
3697                }
3698
3699                /*
3700                 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3701                 * If X < @minlen, we won't trim X when we come back around.
3702                 * So trim it now.  We differ here from trimming extents as we
3703                 * don't keep individual state per bit.
3704                 */
3705                if (async &&
3706                    max_discard_size &&
3707                    bytes > (max_discard_size + minlen))
3708                        bytes = max_discard_size;
3709
3710                bitmap_clear_bits(ctl, entry, start, bytes);
3711                if (entry->bytes == 0)
3712                        free_bitmap(ctl, entry);
3713
3714                spin_unlock(&ctl->tree_lock);
3715                trim_entry.start = start;
3716                trim_entry.bytes = bytes;
3717                list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3718                mutex_unlock(&ctl->cache_writeout_mutex);
3719
3720                ret = do_trimming(block_group, total_trimmed, start, bytes,
3721                                  start, bytes, 0, &trim_entry);
3722                if (ret) {
3723                        reset_trimming_bitmap(ctl, offset);
3724                        block_group->discard_cursor =
3725                                btrfs_block_group_end(block_group);
3726                        break;
3727                }
3728next:
3729                if (next_bitmap) {
3730                        offset += BITS_PER_BITMAP * ctl->unit;
3731                        start = offset;
3732                } else {
3733                        start += bytes;
3734                }
3735                block_group->discard_cursor = start;
3736
3737                if (fatal_signal_pending(current)) {
3738                        if (start != offset)
3739                                reset_trimming_bitmap(ctl, offset);
3740                        ret = -ERESTARTSYS;
3741                        break;
3742                }
3743
3744                cond_resched();
3745        }
3746
3747        if (offset >= end)
3748                block_group->discard_cursor = end;
3749
3750out:
3751        return ret;
3752}
3753
3754int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3755                           u64 *trimmed, u64 start, u64 end, u64 minlen)
3756{
3757        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3758        int ret;
3759        u64 rem = 0;
3760
3761        *trimmed = 0;
3762
3763        spin_lock(&block_group->lock);
3764        if (block_group->removed) {
3765                spin_unlock(&block_group->lock);
3766                return 0;
3767        }
3768        btrfs_freeze_block_group(block_group);
3769        spin_unlock(&block_group->lock);
3770
3771        ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3772        if (ret)
3773                goto out;
3774
3775        ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3776        div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3777        /* If we ended in the middle of a bitmap, reset the trimming flag */
3778        if (rem)
3779                reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3780out:
3781        btrfs_unfreeze_block_group(block_group);
3782        return ret;
3783}
3784
3785int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3786                                   u64 *trimmed, u64 start, u64 end, u64 minlen,
3787                                   bool async)
3788{
3789        int ret;
3790
3791        *trimmed = 0;
3792
3793        spin_lock(&block_group->lock);
3794        if (block_group->removed) {
3795                spin_unlock(&block_group->lock);
3796                return 0;
3797        }
3798        btrfs_freeze_block_group(block_group);
3799        spin_unlock(&block_group->lock);
3800
3801        ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3802        btrfs_unfreeze_block_group(block_group);
3803
3804        return ret;
3805}
3806
3807int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3808                                   u64 *trimmed, u64 start, u64 end, u64 minlen,
3809                                   u64 maxlen, bool async)
3810{
3811        int ret;
3812
3813        *trimmed = 0;
3814
3815        spin_lock(&block_group->lock);
3816        if (block_group->removed) {
3817                spin_unlock(&block_group->lock);
3818                return 0;
3819        }
3820        btrfs_freeze_block_group(block_group);
3821        spin_unlock(&block_group->lock);
3822
3823        ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3824                           async);
3825
3826        btrfs_unfreeze_block_group(block_group);
3827
3828        return ret;
3829}
3830
3831/*
3832 * Find the left-most item in the cache tree, and then return the
3833 * smallest inode number in the item.
3834 *
3835 * Note: the returned inode number may not be the smallest one in
3836 * the tree, if the left-most item is a bitmap.
3837 */
3838u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
3839{
3840        struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
3841        struct btrfs_free_space *entry = NULL;
3842        u64 ino = 0;
3843
3844        spin_lock(&ctl->tree_lock);
3845
3846        if (RB_EMPTY_ROOT(&ctl->free_space_offset))
3847                goto out;
3848
3849        entry = rb_entry(rb_first(&ctl->free_space_offset),
3850                         struct btrfs_free_space, offset_index);
3851
3852        if (!entry->bitmap) {
3853                ino = entry->offset;
3854
3855                unlink_free_space(ctl, entry);
3856                entry->offset++;
3857                entry->bytes--;
3858                if (!entry->bytes)
3859                        kmem_cache_free(btrfs_free_space_cachep, entry);
3860                else
3861                        link_free_space(ctl, entry);
3862        } else {
3863                u64 offset = 0;
3864                u64 count = 1;
3865                int ret;
3866
3867                ret = search_bitmap(ctl, entry, &offset, &count, true);
3868                /* Logic error; Should be empty if it can't find anything */
3869                ASSERT(!ret);
3870
3871                ino = offset;
3872                bitmap_clear_bits(ctl, entry, offset, 1);
3873                if (entry->bytes == 0)
3874                        free_bitmap(ctl, entry);
3875        }
3876out:
3877        spin_unlock(&ctl->tree_lock);
3878
3879        return ino;
3880}
3881
3882struct inode *lookup_free_ino_inode(struct btrfs_root *root,
3883                                    struct btrfs_path *path)
3884{
3885        struct inode *inode = NULL;
3886
3887        spin_lock(&root->ino_cache_lock);
3888        if (root->ino_cache_inode)
3889                inode = igrab(root->ino_cache_inode);
3890        spin_unlock(&root->ino_cache_lock);
3891        if (inode)
3892                return inode;
3893
3894        inode = __lookup_free_space_inode(root, path, 0);
3895        if (IS_ERR(inode))
3896                return inode;
3897
3898        spin_lock(&root->ino_cache_lock);
3899        if (!btrfs_fs_closing(root->fs_info))
3900                root->ino_cache_inode = igrab(inode);
3901        spin_unlock(&root->ino_cache_lock);
3902
3903        return inode;
3904}
3905
3906int create_free_ino_inode(struct btrfs_root *root,
3907                          struct btrfs_trans_handle *trans,
3908                          struct btrfs_path *path)
3909{
3910        return __create_free_space_inode(root, trans, path,
3911                                         BTRFS_FREE_INO_OBJECTID, 0);
3912}
3913
3914int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3915{
3916        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3917        struct btrfs_path *path;
3918        struct inode *inode;
3919        int ret = 0;
3920        u64 root_gen = btrfs_root_generation(&root->root_item);
3921
3922        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3923                return 0;
3924
3925        /*
3926         * If we're unmounting then just return, since this does a search on the
3927         * normal root and not the commit root and we could deadlock.
3928         */
3929        if (btrfs_fs_closing(fs_info))
3930                return 0;
3931
3932        path = btrfs_alloc_path();
3933        if (!path)
3934                return 0;
3935
3936        inode = lookup_free_ino_inode(root, path);
3937        if (IS_ERR(inode))
3938                goto out;
3939
3940        if (root_gen != BTRFS_I(inode)->generation)
3941                goto out_put;
3942
3943        ret = __load_free_space_cache(root, inode, ctl, path, 0);
3944
3945        if (ret < 0)
3946                btrfs_err(fs_info,
3947                        "failed to load free ino cache for root %llu",
3948                        root->root_key.objectid);
3949out_put:
3950        iput(inode);
3951out:
3952        btrfs_free_path(path);
3953        return ret;
3954}
3955
3956int btrfs_write_out_ino_cache(struct btrfs_root *root,
3957                              struct btrfs_trans_handle *trans,
3958                              struct btrfs_path *path,
3959                              struct inode *inode)
3960{
3961        struct btrfs_fs_info *fs_info = root->fs_info;
3962        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3963        int ret;
3964        struct btrfs_io_ctl io_ctl;
3965        bool release_metadata = true;
3966
3967        if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3968                return 0;
3969
3970        memset(&io_ctl, 0, sizeof(io_ctl));
3971        ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
3972        if (!ret) {
3973                /*
3974                 * At this point writepages() didn't error out, so our metadata
3975                 * reservation is released when the writeback finishes, at
3976                 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3977                 * with or without an error.
3978                 */
3979                release_metadata = false;
3980                ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path);
3981        }
3982
3983        if (ret) {
3984                if (release_metadata)
3985                        btrfs_delalloc_release_metadata(BTRFS_I(inode),
3986                                        inode->i_size, true);
3987                btrfs_debug(fs_info,
3988                          "failed to write free ino cache for root %llu error %d",
3989                          root->root_key.objectid, ret);
3990        }
3991
3992        return ret;
3993}
3994
3995#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3996/*
3997 * Use this if you need to make a bitmap or extent entry specifically, it
3998 * doesn't do any of the merging that add_free_space does, this acts a lot like
3999 * how the free space cache loading stuff works, so you can get really weird
4000 * configurations.
4001 */
4002int test_add_free_space_entry(struct btrfs_block_group *cache,
4003                              u64 offset, u64 bytes, bool bitmap)
4004{
4005        struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4006        struct btrfs_free_space *info = NULL, *bitmap_info;
4007        void *map = NULL;
4008        enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
4009        u64 bytes_added;
4010        int ret;
4011
4012again:
4013        if (!info) {
4014                info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4015                if (!info)
4016                        return -ENOMEM;
4017        }
4018
4019        if (!bitmap) {
4020                spin_lock(&ctl->tree_lock);
4021                info->offset = offset;
4022                info->bytes = bytes;
4023                info->max_extent_size = 0;
4024                ret = link_free_space(ctl, info);
4025                spin_unlock(&ctl->tree_lock);
4026                if (ret)
4027                        kmem_cache_free(btrfs_free_space_cachep, info);
4028                return ret;
4029        }
4030
4031        if (!map) {
4032                map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
4033                if (!map) {
4034                        kmem_cache_free(btrfs_free_space_cachep, info);
4035                        return -ENOMEM;
4036                }
4037        }
4038
4039        spin_lock(&ctl->tree_lock);
4040        bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4041                                         1, 0);
4042        if (!bitmap_info) {
4043                info->bitmap = map;
4044                map = NULL;
4045                add_new_bitmap(ctl, info, offset);
4046                bitmap_info = info;
4047                info = NULL;
4048        }
4049
4050        bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4051                                          trim_state);
4052
4053        bytes -= bytes_added;
4054        offset += bytes_added;
4055        spin_unlock(&ctl->tree_lock);
4056
4057        if (bytes)
4058                goto again;
4059
4060        if (info)
4061                kmem_cache_free(btrfs_free_space_cachep, info);
4062        if (map)
4063                kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
4064        return 0;
4065}
4066
4067/*
4068 * Checks to see if the given range is in the free space cache.  This is really
4069 * just used to check the absence of space, so if there is free space in the
4070 * range at all we will return 1.
4071 */
4072int test_check_exists(struct btrfs_block_group *cache,
4073                      u64 offset, u64 bytes)
4074{
4075        struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4076        struct btrfs_free_space *info;
4077        int ret = 0;
4078
4079        spin_lock(&ctl->tree_lock);
4080        info = tree_search_offset(ctl, offset, 0, 0);
4081        if (!info) {
4082                info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4083                                          1, 0);
4084                if (!info)
4085                        goto out;
4086        }
4087
4088have_info:
4089        if (info->bitmap) {
4090                u64 bit_off, bit_bytes;
4091                struct rb_node *n;
4092                struct btrfs_free_space *tmp;
4093
4094                bit_off = offset;
4095                bit_bytes = ctl->unit;
4096                ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
4097                if (!ret) {
4098                        if (bit_off == offset) {
4099                                ret = 1;
4100                                goto out;
4101                        } else if (bit_off > offset &&
4102                                   offset + bytes > bit_off) {
4103                                ret = 1;
4104                                goto out;
4105                        }
4106                }
4107
4108                n = rb_prev(&info->offset_index);
4109                while (n) {
4110                        tmp = rb_entry(n, struct btrfs_free_space,
4111                                       offset_index);
4112                        if (tmp->offset + tmp->bytes < offset)
4113                                break;
4114                        if (offset + bytes < tmp->offset) {
4115                                n = rb_prev(&tmp->offset_index);
4116                                continue;
4117                        }
4118                        info = tmp;
4119                        goto have_info;
4120                }
4121
4122                n = rb_next(&info->offset_index);
4123                while (n) {
4124                        tmp = rb_entry(n, struct btrfs_free_space,
4125                                       offset_index);
4126                        if (offset + bytes < tmp->offset)
4127                                break;
4128                        if (tmp->offset + tmp->bytes < offset) {
4129                                n = rb_next(&tmp->offset_index);
4130                                continue;
4131                        }
4132                        info = tmp;
4133                        goto have_info;
4134                }
4135
4136                ret = 0;
4137                goto out;
4138        }
4139
4140        if (info->offset == offset) {
4141                ret = 1;
4142                goto out;
4143        }
4144
4145        if (offset > info->offset && offset < info->offset + info->bytes)
4146                ret = 1;
4147out:
4148        spin_unlock(&ctl->tree_lock);
4149        return ret;
4150}
4151#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
4152