linux/fs/btrfs/free-space-cache.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Red Hat.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/pagemap.h>
  20#include <linux/sched.h>
  21#include <linux/math64.h>
  22#include "ctree.h"
  23#include "free-space-cache.h"
  24#include "transaction.h"
  25
  26#define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
  27#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
  28
  29static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
  30                                          u64 offset)
  31{
  32        BUG_ON(offset < bitmap_start);
  33        offset -= bitmap_start;
  34        return (unsigned long)(div64_u64(offset, sectorsize));
  35}
  36
  37static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
  38{
  39        return (unsigned long)(div64_u64(bytes, sectorsize));
  40}
  41
  42static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
  43                                   u64 offset)
  44{
  45        u64 bitmap_start;
  46        u64 bytes_per_bitmap;
  47
  48        bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
  49        bitmap_start = offset - block_group->key.objectid;
  50        bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  51        bitmap_start *= bytes_per_bitmap;
  52        bitmap_start += block_group->key.objectid;
  53
  54        return bitmap_start;
  55}
  56
  57static int tree_insert_offset(struct rb_root *root, u64 offset,
  58                              struct rb_node *node, int bitmap)
  59{
  60        struct rb_node **p = &root->rb_node;
  61        struct rb_node *parent = NULL;
  62        struct btrfs_free_space *info;
  63
  64        while (*p) {
  65                parent = *p;
  66                info = rb_entry(parent, struct btrfs_free_space, offset_index);
  67
  68                if (offset < info->offset) {
  69                        p = &(*p)->rb_left;
  70                } else if (offset > info->offset) {
  71                        p = &(*p)->rb_right;
  72                } else {
  73                        /*
  74                         * we could have a bitmap entry and an extent entry
  75                         * share the same offset.  If this is the case, we want
  76                         * the extent entry to always be found first if we do a
  77                         * linear search through the tree, since we want to have
  78                         * the quickest allocation time, and allocating from an
  79                         * extent is faster than allocating from a bitmap.  So
  80                         * if we're inserting a bitmap and we find an entry at
  81                         * this offset, we want to go right, or after this entry
  82                         * logically.  If we are inserting an extent and we've
  83                         * found a bitmap, we want to go left, or before
  84                         * logically.
  85                         */
  86                        if (bitmap) {
  87                                WARN_ON(info->bitmap);
  88                                p = &(*p)->rb_right;
  89                        } else {
  90                                WARN_ON(!info->bitmap);
  91                                p = &(*p)->rb_left;
  92                        }
  93                }
  94        }
  95
  96        rb_link_node(node, parent, p);
  97        rb_insert_color(node, root);
  98
  99        return 0;
 100}
 101
 102/*
 103 * searches the tree for the given offset.
 104 *
 105 * fuzzy - If this is set, then we are trying to make an allocation, and we just
 106 * want a section that has at least bytes size and comes at or after the given
 107 * offset.
 108 */
 109static struct btrfs_free_space *
 110tree_search_offset(struct btrfs_block_group_cache *block_group,
 111                   u64 offset, int bitmap_only, int fuzzy)
 112{
 113        struct rb_node *n = block_group->free_space_offset.rb_node;
 114        struct btrfs_free_space *entry, *prev = NULL;
 115
 116        /* find entry that is closest to the 'offset' */
 117        while (1) {
 118                if (!n) {
 119                        entry = NULL;
 120                        break;
 121                }
 122
 123                entry = rb_entry(n, struct btrfs_free_space, offset_index);
 124                prev = entry;
 125
 126                if (offset < entry->offset)
 127                        n = n->rb_left;
 128                else if (offset > entry->offset)
 129                        n = n->rb_right;
 130                else
 131                        break;
 132        }
 133
 134        if (bitmap_only) {
 135                if (!entry)
 136                        return NULL;
 137                if (entry->bitmap)
 138                        return entry;
 139
 140                /*
 141                 * bitmap entry and extent entry may share same offset,
 142                 * in that case, bitmap entry comes after extent entry.
 143                 */
 144                n = rb_next(n);
 145                if (!n)
 146                        return NULL;
 147                entry = rb_entry(n, struct btrfs_free_space, offset_index);
 148                if (entry->offset != offset)
 149                        return NULL;
 150
 151                WARN_ON(!entry->bitmap);
 152                return entry;
 153        } else if (entry) {
 154                if (entry->bitmap) {
 155                        /*
 156                         * if previous extent entry covers the offset,
 157                         * we should return it instead of the bitmap entry
 158                         */
 159                        n = &entry->offset_index;
 160                        while (1) {
 161                                n = rb_prev(n);
 162                                if (!n)
 163                                        break;
 164                                prev = rb_entry(n, struct btrfs_free_space,
 165                                                offset_index);
 166                                if (!prev->bitmap) {
 167                                        if (prev->offset + prev->bytes > offset)
 168                                                entry = prev;
 169                                        break;
 170                                }
 171                        }
 172                }
 173                return entry;
 174        }
 175
 176        if (!prev)
 177                return NULL;
 178
 179        /* find last entry before the 'offset' */
 180        entry = prev;
 181        if (entry->offset > offset) {
 182                n = rb_prev(&entry->offset_index);
 183                if (n) {
 184                        entry = rb_entry(n, struct btrfs_free_space,
 185                                        offset_index);
 186                        BUG_ON(entry->offset > offset);
 187                } else {
 188                        if (fuzzy)
 189                                return entry;
 190                        else
 191                                return NULL;
 192                }
 193        }
 194
 195        if (entry->bitmap) {
 196                n = &entry->offset_index;
 197                while (1) {
 198                        n = rb_prev(n);
 199                        if (!n)
 200                                break;
 201                        prev = rb_entry(n, struct btrfs_free_space,
 202                                        offset_index);
 203                        if (!prev->bitmap) {
 204                                if (prev->offset + prev->bytes > offset)
 205                                        return prev;
 206                                break;
 207                        }
 208                }
 209                if (entry->offset + BITS_PER_BITMAP *
 210                    block_group->sectorsize > offset)
 211                        return entry;
 212        } else if (entry->offset + entry->bytes > offset)
 213                return entry;
 214
 215        if (!fuzzy)
 216                return NULL;
 217
 218        while (1) {
 219                if (entry->bitmap) {
 220                        if (entry->offset + BITS_PER_BITMAP *
 221                            block_group->sectorsize > offset)
 222                                break;
 223                } else {
 224                        if (entry->offset + entry->bytes > offset)
 225                                break;
 226                }
 227
 228                n = rb_next(&entry->offset_index);
 229                if (!n)
 230                        return NULL;
 231                entry = rb_entry(n, struct btrfs_free_space, offset_index);
 232        }
 233        return entry;
 234}
 235
 236static void unlink_free_space(struct btrfs_block_group_cache *block_group,
 237                              struct btrfs_free_space *info)
 238{
 239        rb_erase(&info->offset_index, &block_group->free_space_offset);
 240        block_group->free_extents--;
 241        block_group->free_space -= info->bytes;
 242}
 243
 244static int link_free_space(struct btrfs_block_group_cache *block_group,
 245                           struct btrfs_free_space *info)
 246{
 247        int ret = 0;
 248
 249        BUG_ON(!info->bitmap && !info->bytes);
 250        ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
 251                                 &info->offset_index, (info->bitmap != NULL));
 252        if (ret)
 253                return ret;
 254
 255        block_group->free_space += info->bytes;
 256        block_group->free_extents++;
 257        return ret;
 258}
 259
 260static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
 261{
 262        u64 max_bytes;
 263        u64 bitmap_bytes;
 264        u64 extent_bytes;
 265
 266        /*
 267         * The goal is to keep the total amount of memory used per 1gb of space
 268         * at or below 32k, so we need to adjust how much memory we allow to be
 269         * used by extent based free space tracking
 270         */
 271        max_bytes = MAX_CACHE_BYTES_PER_GIG *
 272                (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
 273
 274        /*
 275         * we want to account for 1 more bitmap than what we have so we can make
 276         * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
 277         * we add more bitmaps.
 278         */
 279        bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
 280
 281        if (bitmap_bytes >= max_bytes) {
 282                block_group->extents_thresh = 0;
 283                return;
 284        }
 285
 286        /*
 287         * we want the extent entry threshold to always be at most 1/2 the maxw
 288         * bytes we can have, or whatever is less than that.
 289         */
 290        extent_bytes = max_bytes - bitmap_bytes;
 291        extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
 292
 293        block_group->extents_thresh =
 294                div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
 295}
 296
 297static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
 298                              struct btrfs_free_space *info, u64 offset,
 299                              u64 bytes)
 300{
 301        unsigned long start, end;
 302        unsigned long i;
 303
 304        start = offset_to_bit(info->offset, block_group->sectorsize, offset);
 305        end = start + bytes_to_bits(bytes, block_group->sectorsize);
 306        BUG_ON(end > BITS_PER_BITMAP);
 307
 308        for (i = start; i < end; i++)
 309                clear_bit(i, info->bitmap);
 310
 311        info->bytes -= bytes;
 312        block_group->free_space -= bytes;
 313}
 314
 315static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
 316                            struct btrfs_free_space *info, u64 offset,
 317                            u64 bytes)
 318{
 319        unsigned long start, end;
 320        unsigned long i;
 321
 322        start = offset_to_bit(info->offset, block_group->sectorsize, offset);
 323        end = start + bytes_to_bits(bytes, block_group->sectorsize);
 324        BUG_ON(end > BITS_PER_BITMAP);
 325
 326        for (i = start; i < end; i++)
 327                set_bit(i, info->bitmap);
 328
 329        info->bytes += bytes;
 330        block_group->free_space += bytes;
 331}
 332
 333static int search_bitmap(struct btrfs_block_group_cache *block_group,
 334                         struct btrfs_free_space *bitmap_info, u64 *offset,
 335                         u64 *bytes)
 336{
 337        unsigned long found_bits = 0;
 338        unsigned long bits, i;
 339        unsigned long next_zero;
 340
 341        i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
 342                          max_t(u64, *offset, bitmap_info->offset));
 343        bits = bytes_to_bits(*bytes, block_group->sectorsize);
 344
 345        for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
 346             i < BITS_PER_BITMAP;
 347             i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
 348                next_zero = find_next_zero_bit(bitmap_info->bitmap,
 349                                               BITS_PER_BITMAP, i);
 350                if ((next_zero - i) >= bits) {
 351                        found_bits = next_zero - i;
 352                        break;
 353                }
 354                i = next_zero;
 355        }
 356
 357        if (found_bits) {
 358                *offset = (u64)(i * block_group->sectorsize) +
 359                        bitmap_info->offset;
 360                *bytes = (u64)(found_bits) * block_group->sectorsize;
 361                return 0;
 362        }
 363
 364        return -1;
 365}
 366
 367static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
 368                                                *block_group, u64 *offset,
 369                                                u64 *bytes, int debug)
 370{
 371        struct btrfs_free_space *entry;
 372        struct rb_node *node;
 373        int ret;
 374
 375        if (!block_group->free_space_offset.rb_node)
 376                return NULL;
 377
 378        entry = tree_search_offset(block_group,
 379                                   offset_to_bitmap(block_group, *offset),
 380                                   0, 1);
 381        if (!entry)
 382                return NULL;
 383
 384        for (node = &entry->offset_index; node; node = rb_next(node)) {
 385                entry = rb_entry(node, struct btrfs_free_space, offset_index);
 386                if (entry->bytes < *bytes)
 387                        continue;
 388
 389                if (entry->bitmap) {
 390                        ret = search_bitmap(block_group, entry, offset, bytes);
 391                        if (!ret)
 392                                return entry;
 393                        continue;
 394                }
 395
 396                *offset = entry->offset;
 397                *bytes = entry->bytes;
 398                return entry;
 399        }
 400
 401        return NULL;
 402}
 403
 404static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
 405                           struct btrfs_free_space *info, u64 offset)
 406{
 407        u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
 408        int max_bitmaps = (int)div64_u64(block_group->key.offset +
 409                                         bytes_per_bg - 1, bytes_per_bg);
 410        BUG_ON(block_group->total_bitmaps >= max_bitmaps);
 411
 412        info->offset = offset_to_bitmap(block_group, offset);
 413        info->bytes = 0;
 414        link_free_space(block_group, info);
 415        block_group->total_bitmaps++;
 416
 417        recalculate_thresholds(block_group);
 418}
 419
 420static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
 421                              struct btrfs_free_space *bitmap_info,
 422                              u64 *offset, u64 *bytes)
 423{
 424        u64 end;
 425        u64 search_start, search_bytes;
 426        int ret;
 427
 428again:
 429        end = bitmap_info->offset +
 430                (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
 431
 432        /*
 433         * XXX - this can go away after a few releases.
 434         *
 435         * since the only user of btrfs_remove_free_space is the tree logging
 436         * stuff, and the only way to test that is under crash conditions, we
 437         * want to have this debug stuff here just in case somethings not
 438         * working.  Search the bitmap for the space we are trying to use to
 439         * make sure its actually there.  If its not there then we need to stop
 440         * because something has gone wrong.
 441         */
 442        search_start = *offset;
 443        search_bytes = *bytes;
 444        ret = search_bitmap(block_group, bitmap_info, &search_start,
 445                            &search_bytes);
 446        BUG_ON(ret < 0 || search_start != *offset);
 447
 448        if (*offset > bitmap_info->offset && *offset + *bytes > end) {
 449                bitmap_clear_bits(block_group, bitmap_info, *offset,
 450                                  end - *offset + 1);
 451                *bytes -= end - *offset + 1;
 452                *offset = end + 1;
 453        } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
 454                bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
 455                *bytes = 0;
 456        }
 457
 458        if (*bytes) {
 459                struct rb_node *next = rb_next(&bitmap_info->offset_index);
 460                if (!bitmap_info->bytes) {
 461                        unlink_free_space(block_group, bitmap_info);
 462                        kfree(bitmap_info->bitmap);
 463                        kfree(bitmap_info);
 464                        block_group->total_bitmaps--;
 465                        recalculate_thresholds(block_group);
 466                }
 467
 468                /*
 469                 * no entry after this bitmap, but we still have bytes to
 470                 * remove, so something has gone wrong.
 471                 */
 472                if (!next)
 473                        return -EINVAL;
 474
 475                bitmap_info = rb_entry(next, struct btrfs_free_space,
 476                                       offset_index);
 477
 478                /*
 479                 * if the next entry isn't a bitmap we need to return to let the
 480                 * extent stuff do its work.
 481                 */
 482                if (!bitmap_info->bitmap)
 483                        return -EAGAIN;
 484
 485                /*
 486                 * Ok the next item is a bitmap, but it may not actually hold
 487                 * the information for the rest of this free space stuff, so
 488                 * look for it, and if we don't find it return so we can try
 489                 * everything over again.
 490                 */
 491                search_start = *offset;
 492                search_bytes = *bytes;
 493                ret = search_bitmap(block_group, bitmap_info, &search_start,
 494                                    &search_bytes);
 495                if (ret < 0 || search_start != *offset)
 496                        return -EAGAIN;
 497
 498                goto again;
 499        } else if (!bitmap_info->bytes) {
 500                unlink_free_space(block_group, bitmap_info);
 501                kfree(bitmap_info->bitmap);
 502                kfree(bitmap_info);
 503                block_group->total_bitmaps--;
 504                recalculate_thresholds(block_group);
 505        }
 506
 507        return 0;
 508}
 509
 510static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
 511                              struct btrfs_free_space *info)
 512{
 513        struct btrfs_free_space *bitmap_info;
 514        int added = 0;
 515        u64 bytes, offset, end;
 516        int ret;
 517
 518        /*
 519         * If we are below the extents threshold then we can add this as an
 520         * extent, and don't have to deal with the bitmap
 521         */
 522        if (block_group->free_extents < block_group->extents_thresh &&
 523            info->bytes > block_group->sectorsize * 4)
 524                return 0;
 525
 526        /*
 527         * some block groups are so tiny they can't be enveloped by a bitmap, so
 528         * don't even bother to create a bitmap for this
 529         */
 530        if (BITS_PER_BITMAP * block_group->sectorsize >
 531            block_group->key.offset)
 532                return 0;
 533
 534        bytes = info->bytes;
 535        offset = info->offset;
 536
 537again:
 538        bitmap_info = tree_search_offset(block_group,
 539                                         offset_to_bitmap(block_group, offset),
 540                                         1, 0);
 541        if (!bitmap_info) {
 542                BUG_ON(added);
 543                goto new_bitmap;
 544        }
 545
 546        end = bitmap_info->offset +
 547                (u64)(BITS_PER_BITMAP * block_group->sectorsize);
 548
 549        if (offset >= bitmap_info->offset && offset + bytes > end) {
 550                bitmap_set_bits(block_group, bitmap_info, offset,
 551                                end - offset);
 552                bytes -= end - offset;
 553                offset = end;
 554                added = 0;
 555        } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
 556                bitmap_set_bits(block_group, bitmap_info, offset, bytes);
 557                bytes = 0;
 558        } else {
 559                BUG();
 560        }
 561
 562        if (!bytes) {
 563                ret = 1;
 564                goto out;
 565        } else
 566                goto again;
 567
 568new_bitmap:
 569        if (info && info->bitmap) {
 570                add_new_bitmap(block_group, info, offset);
 571                added = 1;
 572                info = NULL;
 573                goto again;
 574        } else {
 575                spin_unlock(&block_group->tree_lock);
 576
 577                /* no pre-allocated info, allocate a new one */
 578                if (!info) {
 579                        info = kzalloc(sizeof(struct btrfs_free_space),
 580                                       GFP_NOFS);
 581                        if (!info) {
 582                                spin_lock(&block_group->tree_lock);
 583                                ret = -ENOMEM;
 584                                goto out;
 585                        }
 586                }
 587
 588                /* allocate the bitmap */
 589                info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
 590                spin_lock(&block_group->tree_lock);
 591                if (!info->bitmap) {
 592                        ret = -ENOMEM;
 593                        goto out;
 594                }
 595                goto again;
 596        }
 597
 598out:
 599        if (info) {
 600                if (info->bitmap)
 601                        kfree(info->bitmap);
 602                kfree(info);
 603        }
 604
 605        return ret;
 606}
 607
 608int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 609                         u64 offset, u64 bytes)
 610{
 611        struct btrfs_free_space *right_info = NULL;
 612        struct btrfs_free_space *left_info = NULL;
 613        struct btrfs_free_space *info = NULL;
 614        int ret = 0;
 615
 616        info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
 617        if (!info)
 618                return -ENOMEM;
 619
 620        info->offset = offset;
 621        info->bytes = bytes;
 622
 623        spin_lock(&block_group->tree_lock);
 624
 625        /*
 626         * first we want to see if there is free space adjacent to the range we
 627         * are adding, if there is remove that struct and add a new one to
 628         * cover the entire range
 629         */
 630        right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
 631        if (right_info && rb_prev(&right_info->offset_index))
 632                left_info = rb_entry(rb_prev(&right_info->offset_index),
 633                                     struct btrfs_free_space, offset_index);
 634        else
 635                left_info = tree_search_offset(block_group, offset - 1, 0, 0);
 636
 637        /*
 638         * If there was no extent directly to the left or right of this new
 639         * extent then we know we're going to have to allocate a new extent, so
 640         * before we do that see if we need to drop this into a bitmap
 641         */
 642        if ((!left_info || left_info->bitmap) &&
 643            (!right_info || right_info->bitmap)) {
 644                ret = insert_into_bitmap(block_group, info);
 645
 646                if (ret < 0) {
 647                        goto out;
 648                } else if (ret) {
 649                        ret = 0;
 650                        goto out;
 651                }
 652        }
 653
 654        if (right_info && !right_info->bitmap) {
 655                unlink_free_space(block_group, right_info);
 656                info->bytes += right_info->bytes;
 657                kfree(right_info);
 658        }
 659
 660        if (left_info && !left_info->bitmap &&
 661            left_info->offset + left_info->bytes == offset) {
 662                unlink_free_space(block_group, left_info);
 663                info->offset = left_info->offset;
 664                info->bytes += left_info->bytes;
 665                kfree(left_info);
 666        }
 667
 668        ret = link_free_space(block_group, info);
 669        if (ret)
 670                kfree(info);
 671out:
 672        spin_unlock(&block_group->tree_lock);
 673
 674        if (ret) {
 675                printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
 676                BUG_ON(ret == -EEXIST);
 677        }
 678
 679        return ret;
 680}
 681
 682int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 683                            u64 offset, u64 bytes)
 684{
 685        struct btrfs_free_space *info;
 686        struct btrfs_free_space *next_info = NULL;
 687        int ret = 0;
 688
 689        spin_lock(&block_group->tree_lock);
 690
 691again:
 692        info = tree_search_offset(block_group, offset, 0, 0);
 693        if (!info) {
 694                /*
 695                 * oops didn't find an extent that matched the space we wanted
 696                 * to remove, look for a bitmap instead
 697                 */
 698                info = tree_search_offset(block_group,
 699                                          offset_to_bitmap(block_group, offset),
 700                                          1, 0);
 701                if (!info) {
 702                        WARN_ON(1);
 703                        goto out_lock;
 704                }
 705        }
 706
 707        if (info->bytes < bytes && rb_next(&info->offset_index)) {
 708                u64 end;
 709                next_info = rb_entry(rb_next(&info->offset_index),
 710                                             struct btrfs_free_space,
 711                                             offset_index);
 712
 713                if (next_info->bitmap)
 714                        end = next_info->offset + BITS_PER_BITMAP *
 715                                block_group->sectorsize - 1;
 716                else
 717                        end = next_info->offset + next_info->bytes;
 718
 719                if (next_info->bytes < bytes ||
 720                    next_info->offset > offset || offset > end) {
 721                        printk(KERN_CRIT "Found free space at %llu, size %llu,"
 722                              " trying to use %llu\n",
 723                              (unsigned long long)info->offset,
 724                              (unsigned long long)info->bytes,
 725                              (unsigned long long)bytes);
 726                        WARN_ON(1);
 727                        ret = -EINVAL;
 728                        goto out_lock;
 729                }
 730
 731                info = next_info;
 732        }
 733
 734        if (info->bytes == bytes) {
 735                unlink_free_space(block_group, info);
 736                if (info->bitmap) {
 737                        kfree(info->bitmap);
 738                        block_group->total_bitmaps--;
 739                }
 740                kfree(info);
 741                goto out_lock;
 742        }
 743
 744        if (!info->bitmap && info->offset == offset) {
 745                unlink_free_space(block_group, info);
 746                info->offset += bytes;
 747                info->bytes -= bytes;
 748                link_free_space(block_group, info);
 749                goto out_lock;
 750        }
 751
 752        if (!info->bitmap && info->offset <= offset &&
 753            info->offset + info->bytes >= offset + bytes) {
 754                u64 old_start = info->offset;
 755                /*
 756                 * we're freeing space in the middle of the info,
 757                 * this can happen during tree log replay
 758                 *
 759                 * first unlink the old info and then
 760                 * insert it again after the hole we're creating
 761                 */
 762                unlink_free_space(block_group, info);
 763                if (offset + bytes < info->offset + info->bytes) {
 764                        u64 old_end = info->offset + info->bytes;
 765
 766                        info->offset = offset + bytes;
 767                        info->bytes = old_end - info->offset;
 768                        ret = link_free_space(block_group, info);
 769                        WARN_ON(ret);
 770                        if (ret)
 771                                goto out_lock;
 772                } else {
 773                        /* the hole we're creating ends at the end
 774                         * of the info struct, just free the info
 775                         */
 776                        kfree(info);
 777                }
 778                spin_unlock(&block_group->tree_lock);
 779
 780                /* step two, insert a new info struct to cover
 781                 * anything before the hole
 782                 */
 783                ret = btrfs_add_free_space(block_group, old_start,
 784                                           offset - old_start);
 785                WARN_ON(ret);
 786                goto out;
 787        }
 788
 789        ret = remove_from_bitmap(block_group, info, &offset, &bytes);
 790        if (ret == -EAGAIN)
 791                goto again;
 792        BUG_ON(ret);
 793out_lock:
 794        spin_unlock(&block_group->tree_lock);
 795out:
 796        return ret;
 797}
 798
 799void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
 800                           u64 bytes)
 801{
 802        struct btrfs_free_space *info;
 803        struct rb_node *n;
 804        int count = 0;
 805
 806        for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
 807                info = rb_entry(n, struct btrfs_free_space, offset_index);
 808                if (info->bytes >= bytes)
 809                        count++;
 810                printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
 811                       (unsigned long long)info->offset,
 812                       (unsigned long long)info->bytes,
 813                       (info->bitmap) ? "yes" : "no");
 814        }
 815        printk(KERN_INFO "block group has cluster?: %s\n",
 816               list_empty(&block_group->cluster_list) ? "no" : "yes");
 817        printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
 818               "\n", count);
 819}
 820
 821u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
 822{
 823        struct btrfs_free_space *info;
 824        struct rb_node *n;
 825        u64 ret = 0;
 826
 827        for (n = rb_first(&block_group->free_space_offset); n;
 828             n = rb_next(n)) {
 829                info = rb_entry(n, struct btrfs_free_space, offset_index);
 830                ret += info->bytes;
 831        }
 832
 833        return ret;
 834}
 835
 836/*
 837 * for a given cluster, put all of its extents back into the free
 838 * space cache.  If the block group passed doesn't match the block group
 839 * pointed to by the cluster, someone else raced in and freed the
 840 * cluster already.  In that case, we just return without changing anything
 841 */
 842static int
 843__btrfs_return_cluster_to_free_space(
 844                             struct btrfs_block_group_cache *block_group,
 845                             struct btrfs_free_cluster *cluster)
 846{
 847        struct btrfs_free_space *entry;
 848        struct rb_node *node;
 849        bool bitmap;
 850
 851        spin_lock(&cluster->lock);
 852        if (cluster->block_group != block_group)
 853                goto out;
 854
 855        bitmap = cluster->points_to_bitmap;
 856        cluster->block_group = NULL;
 857        cluster->window_start = 0;
 858        list_del_init(&cluster->block_group_list);
 859        cluster->points_to_bitmap = false;
 860
 861        if (bitmap)
 862                goto out;
 863
 864        node = rb_first(&cluster->root);
 865        while (node) {
 866                entry = rb_entry(node, struct btrfs_free_space, offset_index);
 867                node = rb_next(&entry->offset_index);
 868                rb_erase(&entry->offset_index, &cluster->root);
 869                BUG_ON(entry->bitmap);
 870                tree_insert_offset(&block_group->free_space_offset,
 871                                   entry->offset, &entry->offset_index, 0);
 872        }
 873        cluster->root.rb_node = NULL;
 874
 875out:
 876        spin_unlock(&cluster->lock);
 877        btrfs_put_block_group(block_group);
 878        return 0;
 879}
 880
 881void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
 882{
 883        struct btrfs_free_space *info;
 884        struct rb_node *node;
 885        struct btrfs_free_cluster *cluster;
 886        struct list_head *head;
 887
 888        spin_lock(&block_group->tree_lock);
 889        while ((head = block_group->cluster_list.next) !=
 890               &block_group->cluster_list) {
 891                cluster = list_entry(head, struct btrfs_free_cluster,
 892                                     block_group_list);
 893
 894                WARN_ON(cluster->block_group != block_group);
 895                __btrfs_return_cluster_to_free_space(block_group, cluster);
 896                if (need_resched()) {
 897                        spin_unlock(&block_group->tree_lock);
 898                        cond_resched();
 899                        spin_lock(&block_group->tree_lock);
 900                }
 901        }
 902
 903        while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
 904                info = rb_entry(node, struct btrfs_free_space, offset_index);
 905                unlink_free_space(block_group, info);
 906                if (info->bitmap)
 907                        kfree(info->bitmap);
 908                kfree(info);
 909                if (need_resched()) {
 910                        spin_unlock(&block_group->tree_lock);
 911                        cond_resched();
 912                        spin_lock(&block_group->tree_lock);
 913                }
 914        }
 915
 916        spin_unlock(&block_group->tree_lock);
 917}
 918
 919u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 920                               u64 offset, u64 bytes, u64 empty_size)
 921{
 922        struct btrfs_free_space *entry = NULL;
 923        u64 bytes_search = bytes + empty_size;
 924        u64 ret = 0;
 925
 926        spin_lock(&block_group->tree_lock);
 927        entry = find_free_space(block_group, &offset, &bytes_search, 0);
 928        if (!entry)
 929                goto out;
 930
 931        ret = offset;
 932        if (entry->bitmap) {
 933                bitmap_clear_bits(block_group, entry, offset, bytes);
 934                if (!entry->bytes) {
 935                        unlink_free_space(block_group, entry);
 936                        kfree(entry->bitmap);
 937                        kfree(entry);
 938                        block_group->total_bitmaps--;
 939                        recalculate_thresholds(block_group);
 940                }
 941        } else {
 942                unlink_free_space(block_group, entry);
 943                entry->offset += bytes;
 944                entry->bytes -= bytes;
 945                if (!entry->bytes)
 946                        kfree(entry);
 947                else
 948                        link_free_space(block_group, entry);
 949        }
 950
 951out:
 952        spin_unlock(&block_group->tree_lock);
 953
 954        return ret;
 955}
 956
 957/*
 958 * given a cluster, put all of its extents back into the free space
 959 * cache.  If a block group is passed, this function will only free
 960 * a cluster that belongs to the passed block group.
 961 *
 962 * Otherwise, it'll get a reference on the block group pointed to by the
 963 * cluster and remove the cluster from it.
 964 */
 965int btrfs_return_cluster_to_free_space(
 966                               struct btrfs_block_group_cache *block_group,
 967                               struct btrfs_free_cluster *cluster)
 968{
 969        int ret;
 970
 971        /* first, get a safe pointer to the block group */
 972        spin_lock(&cluster->lock);
 973        if (!block_group) {
 974                block_group = cluster->block_group;
 975                if (!block_group) {
 976                        spin_unlock(&cluster->lock);
 977                        return 0;
 978                }
 979        } else if (cluster->block_group != block_group) {
 980                /* someone else has already freed it don't redo their work */
 981                spin_unlock(&cluster->lock);
 982                return 0;
 983        }
 984        atomic_inc(&block_group->count);
 985        spin_unlock(&cluster->lock);
 986
 987        /* now return any extents the cluster had on it */
 988        spin_lock(&block_group->tree_lock);
 989        ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
 990        spin_unlock(&block_group->tree_lock);
 991
 992        /* finally drop our ref */
 993        btrfs_put_block_group(block_group);
 994        return ret;
 995}
 996
 997static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
 998                                   struct btrfs_free_cluster *cluster,
 999                                   u64 bytes, u64 min_start)
1000{
1001        struct btrfs_free_space *entry;
1002        int err;
1003        u64 search_start = cluster->window_start;
1004        u64 search_bytes = bytes;
1005        u64 ret = 0;
1006
1007        spin_lock(&block_group->tree_lock);
1008        spin_lock(&cluster->lock);
1009
1010        if (!cluster->points_to_bitmap)
1011                goto out;
1012
1013        if (cluster->block_group != block_group)
1014                goto out;
1015
1016        /*
1017         * search_start is the beginning of the bitmap, but at some point it may
1018         * be a good idea to point to the actual start of the free area in the
1019         * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1020         * to 1 to make sure we get the bitmap entry
1021         */
1022        entry = tree_search_offset(block_group,
1023                                   offset_to_bitmap(block_group, search_start),
1024                                   1, 0);
1025        if (!entry || !entry->bitmap)
1026                goto out;
1027
1028        search_start = min_start;
1029        search_bytes = bytes;
1030
1031        err = search_bitmap(block_group, entry, &search_start,
1032                            &search_bytes);
1033        if (err)
1034                goto out;
1035
1036        ret = search_start;
1037        bitmap_clear_bits(block_group, entry, ret, bytes);
1038out:
1039        spin_unlock(&cluster->lock);
1040        spin_unlock(&block_group->tree_lock);
1041
1042        return ret;
1043}
1044
1045/*
1046 * given a cluster, try to allocate 'bytes' from it, returns 0
1047 * if it couldn't find anything suitably large, or a logical disk offset
1048 * if things worked out
1049 */
1050u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1051                             struct btrfs_free_cluster *cluster, u64 bytes,
1052                             u64 min_start)
1053{
1054        struct btrfs_free_space *entry = NULL;
1055        struct rb_node *node;
1056        u64 ret = 0;
1057
1058        if (cluster->points_to_bitmap)
1059                return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1060                                               min_start);
1061
1062        spin_lock(&cluster->lock);
1063        if (bytes > cluster->max_size)
1064                goto out;
1065
1066        if (cluster->block_group != block_group)
1067                goto out;
1068
1069        node = rb_first(&cluster->root);
1070        if (!node)
1071                goto out;
1072
1073        entry = rb_entry(node, struct btrfs_free_space, offset_index);
1074
1075        while(1) {
1076                if (entry->bytes < bytes || entry->offset < min_start) {
1077                        struct rb_node *node;
1078
1079                        node = rb_next(&entry->offset_index);
1080                        if (!node)
1081                                break;
1082                        entry = rb_entry(node, struct btrfs_free_space,
1083                                         offset_index);
1084                        continue;
1085                }
1086                ret = entry->offset;
1087
1088                entry->offset += bytes;
1089                entry->bytes -= bytes;
1090
1091                if (entry->bytes == 0) {
1092                        rb_erase(&entry->offset_index, &cluster->root);
1093                        kfree(entry);
1094                }
1095                break;
1096        }
1097out:
1098        spin_unlock(&cluster->lock);
1099
1100        return ret;
1101}
1102
1103static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1104                                struct btrfs_free_space *entry,
1105                                struct btrfs_free_cluster *cluster,
1106                                u64 offset, u64 bytes, u64 min_bytes)
1107{
1108        unsigned long next_zero;
1109        unsigned long i;
1110        unsigned long search_bits;
1111        unsigned long total_bits;
1112        unsigned long found_bits;
1113        unsigned long start = 0;
1114        unsigned long total_found = 0;
1115        bool found = false;
1116
1117        i = offset_to_bit(entry->offset, block_group->sectorsize,
1118                          max_t(u64, offset, entry->offset));
1119        search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1120        total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1121
1122again:
1123        found_bits = 0;
1124        for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1125             i < BITS_PER_BITMAP;
1126             i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1127                next_zero = find_next_zero_bit(entry->bitmap,
1128                                               BITS_PER_BITMAP, i);
1129                if (next_zero - i >= search_bits) {
1130                        found_bits = next_zero - i;
1131                        break;
1132                }
1133                i = next_zero;
1134        }
1135
1136        if (!found_bits)
1137                return -1;
1138
1139        if (!found) {
1140                start = i;
1141                found = true;
1142        }
1143
1144        total_found += found_bits;
1145
1146        if (cluster->max_size < found_bits * block_group->sectorsize)
1147                cluster->max_size = found_bits * block_group->sectorsize;
1148
1149        if (total_found < total_bits) {
1150                i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1151                if (i - start > total_bits * 2) {
1152                        total_found = 0;
1153                        cluster->max_size = 0;
1154                        found = false;
1155                }
1156                goto again;
1157        }
1158
1159        cluster->window_start = start * block_group->sectorsize +
1160                entry->offset;
1161        cluster->points_to_bitmap = true;
1162
1163        return 0;
1164}
1165
1166/*
1167 * here we try to find a cluster of blocks in a block group.  The goal
1168 * is to find at least bytes free and up to empty_size + bytes free.
1169 * We might not find them all in one contiguous area.
1170 *
1171 * returns zero and sets up cluster if things worked out, otherwise
1172 * it returns -enospc
1173 */
1174int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1175                             struct btrfs_root *root,
1176                             struct btrfs_block_group_cache *block_group,
1177                             struct btrfs_free_cluster *cluster,
1178                             u64 offset, u64 bytes, u64 empty_size)
1179{
1180        struct btrfs_free_space *entry = NULL;
1181        struct rb_node *node;
1182        struct btrfs_free_space *next;
1183        struct btrfs_free_space *last = NULL;
1184        u64 min_bytes;
1185        u64 window_start;
1186        u64 window_free;
1187        u64 max_extent = 0;
1188        bool found_bitmap = false;
1189        int ret;
1190
1191        /* for metadata, allow allocates with more holes */
1192        if (btrfs_test_opt(root, SSD_SPREAD)) {
1193                min_bytes = bytes + empty_size;
1194        } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1195                /*
1196                 * we want to do larger allocations when we are
1197                 * flushing out the delayed refs, it helps prevent
1198                 * making more work as we go along.
1199                 */
1200                if (trans->transaction->delayed_refs.flushing)
1201                        min_bytes = max(bytes, (bytes + empty_size) >> 1);
1202                else
1203                        min_bytes = max(bytes, (bytes + empty_size) >> 4);
1204        } else
1205                min_bytes = max(bytes, (bytes + empty_size) >> 2);
1206
1207        spin_lock(&block_group->tree_lock);
1208        spin_lock(&cluster->lock);
1209
1210        /* someone already found a cluster, hooray */
1211        if (cluster->block_group) {
1212                ret = 0;
1213                goto out;
1214        }
1215again:
1216        entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1217        if (!entry) {
1218                ret = -ENOSPC;
1219                goto out;
1220        }
1221
1222        /*
1223         * If found_bitmap is true, we exhausted our search for extent entries,
1224         * and we just want to search all of the bitmaps that we can find, and
1225         * ignore any extent entries we find.
1226         */
1227        while (entry->bitmap || found_bitmap ||
1228               (!entry->bitmap && entry->bytes < min_bytes)) {
1229                struct rb_node *node = rb_next(&entry->offset_index);
1230
1231                if (entry->bitmap && entry->bytes > bytes + empty_size) {
1232                        ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1233                                                   offset, bytes + empty_size,
1234                                                   min_bytes);
1235                        if (!ret)
1236                                goto got_it;
1237                }
1238
1239                if (!node) {
1240                        ret = -ENOSPC;
1241                        goto out;
1242                }
1243                entry = rb_entry(node, struct btrfs_free_space, offset_index);
1244        }
1245
1246        /*
1247         * We already searched all the extent entries from the passed in offset
1248         * to the end and didn't find enough space for the cluster, and we also
1249         * didn't find any bitmaps that met our criteria, just go ahead and exit
1250         */
1251        if (found_bitmap) {
1252                ret = -ENOSPC;
1253                goto out;
1254        }
1255
1256        cluster->points_to_bitmap = false;
1257        window_start = entry->offset;
1258        window_free = entry->bytes;
1259        last = entry;
1260        max_extent = entry->bytes;
1261
1262        while (1) {
1263                /* out window is just right, lets fill it */
1264                if (window_free >= bytes + empty_size)
1265                        break;
1266
1267                node = rb_next(&last->offset_index);
1268                if (!node) {
1269                        if (found_bitmap)
1270                                goto again;
1271                        ret = -ENOSPC;
1272                        goto out;
1273                }
1274                next = rb_entry(node, struct btrfs_free_space, offset_index);
1275
1276                /*
1277                 * we found a bitmap, so if this search doesn't result in a
1278                 * cluster, we know to go and search again for the bitmaps and
1279                 * start looking for space there
1280                 */
1281                if (next->bitmap) {
1282                        if (!found_bitmap)
1283                                offset = next->offset;
1284                        found_bitmap = true;
1285                        last = next;
1286                        continue;
1287                }
1288
1289                /*
1290                 * we haven't filled the empty size and the window is
1291                 * very large.  reset and try again
1292                 */
1293                if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1294                    next->offset - window_start > (bytes + empty_size) * 2) {
1295                        entry = next;
1296                        window_start = entry->offset;
1297                        window_free = entry->bytes;
1298                        last = entry;
1299                        max_extent = entry->bytes;
1300                } else {
1301                        last = next;
1302                        window_free += next->bytes;
1303                        if (entry->bytes > max_extent)
1304                                max_extent = entry->bytes;
1305                }
1306        }
1307
1308        cluster->window_start = entry->offset;
1309
1310        /*
1311         * now we've found our entries, pull them out of the free space
1312         * cache and put them into the cluster rbtree
1313         *
1314         * The cluster includes an rbtree, but only uses the offset index
1315         * of each free space cache entry.
1316         */
1317        while (1) {
1318                node = rb_next(&entry->offset_index);
1319                if (entry->bitmap && node) {
1320                        entry = rb_entry(node, struct btrfs_free_space,
1321                                         offset_index);
1322                        continue;
1323                } else if (entry->bitmap && !node) {
1324                        break;
1325                }
1326
1327                rb_erase(&entry->offset_index, &block_group->free_space_offset);
1328                ret = tree_insert_offset(&cluster->root, entry->offset,
1329                                         &entry->offset_index, 0);
1330                BUG_ON(ret);
1331
1332                if (!node || entry == last)
1333                        break;
1334
1335                entry = rb_entry(node, struct btrfs_free_space, offset_index);
1336        }
1337
1338        cluster->max_size = max_extent;
1339got_it:
1340        ret = 0;
1341        atomic_inc(&block_group->count);
1342        list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1343        cluster->block_group = block_group;
1344out:
1345        spin_unlock(&cluster->lock);
1346        spin_unlock(&block_group->tree_lock);
1347
1348        return ret;
1349}
1350
1351/*
1352 * simple code to zero out a cluster
1353 */
1354void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1355{
1356        spin_lock_init(&cluster->lock);
1357        spin_lock_init(&cluster->refill_lock);
1358        cluster->root.rb_node = NULL;
1359        cluster->max_size = 0;
1360        cluster->points_to_bitmap = false;
1361        INIT_LIST_HEAD(&cluster->block_group_list);
1362        cluster->block_group = NULL;
1363}
1364
1365