linux/drivers/md/persistent-data/dm-space-map-common.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 Red Hat, Inc.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-space-map-common.h"
   8#include "dm-transaction-manager.h"
   9#include "dm-btree-internal.h"
  10#include "dm-persistent-data-internal.h"
  11
  12#include <linux/bitops.h>
  13#include <linux/device-mapper.h>
  14
  15#define DM_MSG_PREFIX "space map common"
  16
  17/*----------------------------------------------------------------*/
  18
  19/*
  20 * Index validator.
  21 */
  22#define INDEX_CSUM_XOR 160478
  23
  24static void index_prepare_for_write(struct dm_block_validator *v,
  25                                    struct dm_block *b,
  26                                    size_t block_size)
  27{
  28        struct disk_metadata_index *mi_le = dm_block_data(b);
  29
  30        mi_le->blocknr = cpu_to_le64(dm_block_location(b));
  31        mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
  32                                                 block_size - sizeof(__le32),
  33                                                 INDEX_CSUM_XOR));
  34}
  35
  36static int index_check(struct dm_block_validator *v,
  37                       struct dm_block *b,
  38                       size_t block_size)
  39{
  40        struct disk_metadata_index *mi_le = dm_block_data(b);
  41        __le32 csum_disk;
  42
  43        if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
  44                DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
  45                            le64_to_cpu(mi_le->blocknr), dm_block_location(b));
  46                return -ENOTBLK;
  47        }
  48
  49        csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
  50                                               block_size - sizeof(__le32),
  51                                               INDEX_CSUM_XOR));
  52        if (csum_disk != mi_le->csum) {
  53                DMERR_LIMIT("index_check failed: csum %u != wanted %u",
  54                            le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
  55                return -EILSEQ;
  56        }
  57
  58        return 0;
  59}
  60
  61static struct dm_block_validator index_validator = {
  62        .name = "index",
  63        .prepare_for_write = index_prepare_for_write,
  64        .check = index_check
  65};
  66
  67/*----------------------------------------------------------------*/
  68
  69/*
  70 * Bitmap validator
  71 */
  72#define BITMAP_CSUM_XOR 240779
  73
  74static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
  75                                        struct dm_block *b,
  76                                        size_t block_size)
  77{
  78        struct disk_bitmap_header *disk_header = dm_block_data(b);
  79
  80        disk_header->blocknr = cpu_to_le64(dm_block_location(b));
  81        disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
  82                                                       block_size - sizeof(__le32),
  83                                                       BITMAP_CSUM_XOR));
  84}
  85
  86static int dm_bitmap_check(struct dm_block_validator *v,
  87                           struct dm_block *b,
  88                           size_t block_size)
  89{
  90        struct disk_bitmap_header *disk_header = dm_block_data(b);
  91        __le32 csum_disk;
  92
  93        if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
  94                DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
  95                            le64_to_cpu(disk_header->blocknr), dm_block_location(b));
  96                return -ENOTBLK;
  97        }
  98
  99        csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
 100                                               block_size - sizeof(__le32),
 101                                               BITMAP_CSUM_XOR));
 102        if (csum_disk != disk_header->csum) {
 103                DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
 104                            le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
 105                return -EILSEQ;
 106        }
 107
 108        return 0;
 109}
 110
 111static struct dm_block_validator dm_sm_bitmap_validator = {
 112        .name = "sm_bitmap",
 113        .prepare_for_write = dm_bitmap_prepare_for_write,
 114        .check = dm_bitmap_check,
 115};
 116
 117/*----------------------------------------------------------------*/
 118
 119#define ENTRIES_PER_WORD 32
 120#define ENTRIES_SHIFT   5
 121
 122static void *dm_bitmap_data(struct dm_block *b)
 123{
 124        return dm_block_data(b) + sizeof(struct disk_bitmap_header);
 125}
 126
 127#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
 128
 129static unsigned dm_bitmap_word_used(void *addr, unsigned b)
 130{
 131        __le64 *words_le = addr;
 132        __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
 133
 134        uint64_t bits = le64_to_cpu(*w_le);
 135        uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
 136
 137        return !(~bits & mask);
 138}
 139
 140static unsigned sm_lookup_bitmap(void *addr, unsigned b)
 141{
 142        __le64 *words_le = addr;
 143        __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
 144        unsigned hi, lo;
 145
 146        b = (b & (ENTRIES_PER_WORD - 1)) << 1;
 147        hi = !!test_bit_le(b, (void *) w_le);
 148        lo = !!test_bit_le(b + 1, (void *) w_le);
 149        return (hi << 1) | lo;
 150}
 151
 152static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
 153{
 154        __le64 *words_le = addr;
 155        __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
 156
 157        b = (b & (ENTRIES_PER_WORD - 1)) << 1;
 158
 159        if (val & 2)
 160                __set_bit_le(b, (void *) w_le);
 161        else
 162                __clear_bit_le(b, (void *) w_le);
 163
 164        if (val & 1)
 165                __set_bit_le(b + 1, (void *) w_le);
 166        else
 167                __clear_bit_le(b + 1, (void *) w_le);
 168}
 169
 170static int sm_find_free(void *addr, unsigned begin, unsigned end,
 171                        unsigned *result)
 172{
 173        while (begin < end) {
 174                if (!(begin & (ENTRIES_PER_WORD - 1)) &&
 175                    dm_bitmap_word_used(addr, begin)) {
 176                        begin += ENTRIES_PER_WORD;
 177                        continue;
 178                }
 179
 180                if (!sm_lookup_bitmap(addr, begin)) {
 181                        *result = begin;
 182                        return 0;
 183                }
 184
 185                begin++;
 186        }
 187
 188        return -ENOSPC;
 189}
 190
 191/*----------------------------------------------------------------*/
 192
 193static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
 194{
 195        memset(ll, 0, sizeof(struct ll_disk));
 196
 197        ll->tm = tm;
 198
 199        ll->bitmap_info.tm = tm;
 200        ll->bitmap_info.levels = 1;
 201
 202        /*
 203         * Because the new bitmap blocks are created via a shadow
 204         * operation, the old entry has already had its reference count
 205         * decremented and we don't need the btree to do any bookkeeping.
 206         */
 207        ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
 208        ll->bitmap_info.value_type.inc = NULL;
 209        ll->bitmap_info.value_type.dec = NULL;
 210        ll->bitmap_info.value_type.equal = NULL;
 211
 212        ll->ref_count_info.tm = tm;
 213        ll->ref_count_info.levels = 1;
 214        ll->ref_count_info.value_type.size = sizeof(uint32_t);
 215        ll->ref_count_info.value_type.inc = NULL;
 216        ll->ref_count_info.value_type.dec = NULL;
 217        ll->ref_count_info.value_type.equal = NULL;
 218
 219        ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
 220
 221        if (ll->block_size > (1 << 30)) {
 222                DMERR("block size too big to hold bitmaps");
 223                return -EINVAL;
 224        }
 225
 226        ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
 227                ENTRIES_PER_BYTE;
 228        ll->nr_blocks = 0;
 229        ll->bitmap_root = 0;
 230        ll->ref_count_root = 0;
 231        ll->bitmap_index_changed = false;
 232
 233        return 0;
 234}
 235
 236int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
 237{
 238        int r;
 239        dm_block_t i, nr_blocks, nr_indexes;
 240        unsigned old_blocks, blocks;
 241
 242        nr_blocks = ll->nr_blocks + extra_blocks;
 243        old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
 244        blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
 245
 246        nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
 247        if (nr_indexes > ll->max_entries(ll)) {
 248                DMERR("space map too large");
 249                return -EINVAL;
 250        }
 251
 252        /*
 253         * We need to set this before the dm_tm_new_block() call below.
 254         */
 255        ll->nr_blocks = nr_blocks;
 256        for (i = old_blocks; i < blocks; i++) {
 257                struct dm_block *b;
 258                struct disk_index_entry idx;
 259
 260                r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
 261                if (r < 0)
 262                        return r;
 263
 264                idx.blocknr = cpu_to_le64(dm_block_location(b));
 265
 266                dm_tm_unlock(ll->tm, b);
 267
 268                idx.nr_free = cpu_to_le32(ll->entries_per_block);
 269                idx.none_free_before = 0;
 270
 271                r = ll->save_ie(ll, i, &idx);
 272                if (r < 0)
 273                        return r;
 274        }
 275
 276        return 0;
 277}
 278
 279int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
 280{
 281        int r;
 282        dm_block_t index = b;
 283        struct disk_index_entry ie_disk;
 284        struct dm_block *blk;
 285
 286        b = do_div(index, ll->entries_per_block);
 287        r = ll->load_ie(ll, index, &ie_disk);
 288        if (r < 0)
 289                return r;
 290
 291        r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
 292                            &dm_sm_bitmap_validator, &blk);
 293        if (r < 0)
 294                return r;
 295
 296        *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
 297
 298        dm_tm_unlock(ll->tm, blk);
 299
 300        return 0;
 301}
 302
 303static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
 304                                      uint32_t *result)
 305{
 306        __le32 le_rc;
 307        int r;
 308
 309        r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
 310        if (r < 0)
 311                return r;
 312
 313        *result = le32_to_cpu(le_rc);
 314
 315        return r;
 316}
 317
 318int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
 319{
 320        int r = sm_ll_lookup_bitmap(ll, b, result);
 321
 322        if (r)
 323                return r;
 324
 325        if (*result != 3)
 326                return r;
 327
 328        return sm_ll_lookup_big_ref_count(ll, b, result);
 329}
 330
 331int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
 332                          dm_block_t end, dm_block_t *result)
 333{
 334        int r;
 335        struct disk_index_entry ie_disk;
 336        dm_block_t i, index_begin = begin;
 337        dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
 338
 339        /*
 340         * FIXME: Use shifts
 341         */
 342        begin = do_div(index_begin, ll->entries_per_block);
 343        end = do_div(end, ll->entries_per_block);
 344        if (end == 0)
 345                end = ll->entries_per_block;
 346
 347        for (i = index_begin; i < index_end; i++, begin = 0) {
 348                struct dm_block *blk;
 349                unsigned position;
 350                uint32_t bit_end;
 351
 352                r = ll->load_ie(ll, i, &ie_disk);
 353                if (r < 0)
 354                        return r;
 355
 356                if (le32_to_cpu(ie_disk.nr_free) == 0)
 357                        continue;
 358
 359                r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
 360                                    &dm_sm_bitmap_validator, &blk);
 361                if (r < 0)
 362                        return r;
 363
 364                bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
 365
 366                r = sm_find_free(dm_bitmap_data(blk),
 367                                 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
 368                                 bit_end, &position);
 369                if (r == -ENOSPC) {
 370                        /*
 371                         * This might happen because we started searching
 372                         * part way through the bitmap.
 373                         */
 374                        dm_tm_unlock(ll->tm, blk);
 375                        continue;
 376                }
 377
 378                dm_tm_unlock(ll->tm, blk);
 379
 380                *result = i * ll->entries_per_block + (dm_block_t) position;
 381                return 0;
 382        }
 383
 384        return -ENOSPC;
 385}
 386
 387int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
 388                                 dm_block_t begin, dm_block_t end, dm_block_t *b)
 389{
 390        int r;
 391        uint32_t count;
 392
 393        do {
 394                r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
 395                if (r)
 396                        break;
 397
 398                /* double check this block wasn't used in the old transaction */
 399                if (*b >= old_ll->nr_blocks)
 400                        count = 0;
 401                else {
 402                        r = sm_ll_lookup(old_ll, *b, &count);
 403                        if (r)
 404                                break;
 405
 406                        if (count)
 407                                begin = *b + 1;
 408                }
 409        } while (count);
 410
 411        return r;
 412}
 413
 414/*----------------------------------------------------------------*/
 415
 416int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
 417                 uint32_t ref_count, int32_t *nr_allocations)
 418{
 419        int r;
 420        uint32_t bit, old;
 421        struct dm_block *nb;
 422        dm_block_t index = b;
 423        struct disk_index_entry ie_disk;
 424        void *bm_le;
 425        int inc;
 426
 427        bit = do_div(index, ll->entries_per_block);
 428        r = ll->load_ie(ll, index, &ie_disk);
 429        if (r < 0)
 430                return r;
 431
 432        r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
 433                               &dm_sm_bitmap_validator, &nb, &inc);
 434        if (r < 0) {
 435                DMERR("dm_tm_shadow_block() failed");
 436                return r;
 437        }
 438        ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
 439        bm_le = dm_bitmap_data(nb);
 440
 441        old = sm_lookup_bitmap(bm_le, bit);
 442        if (old > 2) {
 443                r = sm_ll_lookup_big_ref_count(ll, b, &old);
 444                if (r < 0) {
 445                        dm_tm_unlock(ll->tm, nb);
 446                        return r;
 447                }
 448        }
 449
 450        if (r) {
 451                dm_tm_unlock(ll->tm, nb);
 452                return r;
 453        }
 454
 455        if (ref_count <= 2) {
 456                sm_set_bitmap(bm_le, bit, ref_count);
 457                dm_tm_unlock(ll->tm, nb);
 458
 459                if (old > 2) {
 460                        r = dm_btree_remove(&ll->ref_count_info,
 461                                            ll->ref_count_root,
 462                                            &b, &ll->ref_count_root);
 463                        if (r)
 464                                return r;
 465                }
 466
 467        } else {
 468                __le32 le_rc = cpu_to_le32(ref_count);
 469
 470                sm_set_bitmap(bm_le, bit, 3);
 471                dm_tm_unlock(ll->tm, nb);
 472
 473                __dm_bless_for_disk(&le_rc);
 474                r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
 475                                    &b, &le_rc, &ll->ref_count_root);
 476                if (r < 0) {
 477                        DMERR("ref count insert failed");
 478                        return r;
 479                }
 480        }
 481
 482        if (ref_count && !old) {
 483                *nr_allocations = 1;
 484                ll->nr_allocated++;
 485                le32_add_cpu(&ie_disk.nr_free, -1);
 486                if (le32_to_cpu(ie_disk.none_free_before) == bit)
 487                        ie_disk.none_free_before = cpu_to_le32(bit + 1);
 488
 489        } else if (old && !ref_count) {
 490                *nr_allocations = -1;
 491                ll->nr_allocated--;
 492                le32_add_cpu(&ie_disk.nr_free, 1);
 493                ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
 494        } else
 495                *nr_allocations = 0;
 496
 497        return ll->save_ie(ll, index, &ie_disk);
 498}
 499
 500/*----------------------------------------------------------------*/
 501
 502/*
 503 * Holds useful intermediate results for the range based inc and dec
 504 * operations.
 505 */
 506struct inc_context {
 507        struct disk_index_entry ie_disk;
 508        struct dm_block *bitmap_block;
 509        void *bitmap;
 510
 511        struct dm_block *overflow_leaf;
 512};
 513
 514static inline void init_inc_context(struct inc_context *ic)
 515{
 516        ic->bitmap_block = NULL;
 517        ic->bitmap = NULL;
 518        ic->overflow_leaf = NULL;
 519}
 520
 521static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
 522{
 523        if (ic->bitmap_block)
 524                dm_tm_unlock(ll->tm, ic->bitmap_block);
 525        if (ic->overflow_leaf)
 526                dm_tm_unlock(ll->tm, ic->overflow_leaf);
 527}
 528
 529static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
 530{
 531        exit_inc_context(ll, ic);
 532        init_inc_context(ic);
 533}
 534
 535/*
 536 * Confirms a btree node contains a particular key at an index.
 537 */
 538static bool contains_key(struct btree_node *n, uint64_t key, int index)
 539{
 540        return index >= 0 &&
 541                index < le32_to_cpu(n->header.nr_entries) &&
 542                le64_to_cpu(n->keys[index]) == key;
 543}
 544
 545static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 546{
 547        int r;
 548        int index;
 549        struct btree_node *n;
 550        __le32 *v_ptr;
 551        uint32_t rc;
 552
 553        /*
 554         * bitmap_block needs to be unlocked because getting the
 555         * overflow_leaf may need to allocate, and thus use the space map.
 556         */
 557        reset_inc_context(ll, ic);
 558
 559        r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
 560                                     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
 561        if (r < 0)
 562                return r;
 563
 564        n = dm_block_data(ic->overflow_leaf);
 565
 566        if (!contains_key(n, b, index)) {
 567                DMERR("overflow btree is missing an entry");
 568                return -EINVAL;
 569        }
 570
 571        v_ptr = value_ptr(n, index);
 572        rc = le32_to_cpu(*v_ptr) + 1;
 573        *v_ptr = cpu_to_le32(rc);
 574
 575        return 0;
 576}
 577
 578static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 579{
 580        int index;
 581        struct btree_node *n;
 582        __le32 *v_ptr;
 583        uint32_t rc;
 584
 585        /*
 586         * Do we already have the correct overflow leaf?
 587         */
 588        if (ic->overflow_leaf) {
 589                n = dm_block_data(ic->overflow_leaf);
 590                index = lower_bound(n, b);
 591                if (contains_key(n, b, index)) {
 592                        v_ptr = value_ptr(n, index);
 593                        rc = le32_to_cpu(*v_ptr) + 1;
 594                        *v_ptr = cpu_to_le32(rc);
 595
 596                        return 0;
 597                }
 598        }
 599
 600        return __sm_ll_inc_overflow(ll, b, ic);
 601}
 602
 603static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
 604{
 605        int r, inc;
 606        r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
 607                               &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
 608        if (r < 0) {
 609                DMERR("dm_tm_shadow_block() failed");
 610                return r;
 611        }
 612        ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
 613        ic->bitmap = dm_bitmap_data(ic->bitmap_block);
 614        return 0;
 615}
 616
 617/*
 618 * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
 619 * we can reopen the bitmap with a simple write lock, rather than re calling
 620 * dm_tm_shadow_block().
 621 */
 622static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
 623{
 624        if (!ic->bitmap_block) {
 625                int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
 626                                         &dm_sm_bitmap_validator, &ic->bitmap_block);
 627                if (r) {
 628                        DMERR("unable to re-get write lock for bitmap");
 629                        return r;
 630                }
 631                ic->bitmap = dm_bitmap_data(ic->bitmap_block);
 632        }
 633
 634        return 0;
 635}
 636
 637/*
 638 * Loops round incrementing entries in a single bitmap.
 639 */
 640static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
 641                                   uint32_t bit, uint32_t bit_end,
 642                                   int32_t *nr_allocations, dm_block_t *new_b,
 643                                   struct inc_context *ic)
 644{
 645        int r;
 646        __le32 le_rc;
 647        uint32_t old;
 648
 649        for (; bit != bit_end; bit++, b++) {
 650                /*
 651                 * We only need to drop the bitmap if we need to find a new btree
 652                 * leaf for the overflow.  So if it was dropped last iteration,
 653                 * we now re-get it.
 654                 */
 655                r = ensure_bitmap(ll, ic);
 656                if (r)
 657                        return r;
 658
 659                old = sm_lookup_bitmap(ic->bitmap, bit);
 660                switch (old) {
 661                case 0:
 662                        /* inc bitmap, adjust nr_allocated */
 663                        sm_set_bitmap(ic->bitmap, bit, 1);
 664                        (*nr_allocations)++;
 665                        ll->nr_allocated++;
 666                        le32_add_cpu(&ic->ie_disk.nr_free, -1);
 667                        if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
 668                                ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
 669                        break;
 670
 671                case 1:
 672                        /* inc bitmap */
 673                        sm_set_bitmap(ic->bitmap, bit, 2);
 674                        break;
 675
 676                case 2:
 677                        /* inc bitmap and insert into overflow */
 678                        sm_set_bitmap(ic->bitmap, bit, 3);
 679                        reset_inc_context(ll, ic);
 680
 681                        le_rc = cpu_to_le32(3);
 682                        __dm_bless_for_disk(&le_rc);
 683                        r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
 684                                            &b, &le_rc, &ll->ref_count_root);
 685                        if (r < 0) {
 686                                DMERR("ref count insert failed");
 687                                return r;
 688                        }
 689                        break;
 690
 691                default:
 692                        /*
 693                         * inc within the overflow tree only.
 694                         */
 695                        r = sm_ll_inc_overflow(ll, b, ic);
 696                        if (r < 0)
 697                                return r;
 698                }
 699        }
 700
 701        *new_b = b;
 702        return 0;
 703}
 704
 705/*
 706 * Finds a bitmap that contains entries in the block range, and increments
 707 * them.
 708 */
 709static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 710                       int32_t *nr_allocations, dm_block_t *new_b)
 711{
 712        int r;
 713        struct inc_context ic;
 714        uint32_t bit, bit_end;
 715        dm_block_t index = b;
 716
 717        init_inc_context(&ic);
 718
 719        bit = do_div(index, ll->entries_per_block);
 720        r = ll->load_ie(ll, index, &ic.ie_disk);
 721        if (r < 0)
 722                return r;
 723
 724        r = shadow_bitmap(ll, &ic);
 725        if (r)
 726                return r;
 727
 728        bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
 729        r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
 730
 731        exit_inc_context(ll, &ic);
 732
 733        if (r)
 734                return r;
 735
 736        return ll->save_ie(ll, index, &ic.ie_disk);
 737}
 738
 739int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 740              int32_t *nr_allocations)
 741{
 742        *nr_allocations = 0;
 743        while (b != e) {
 744                int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
 745                if (r)
 746                        return r;
 747        }
 748
 749        return 0;
 750}
 751
 752/*----------------------------------------------------------------*/
 753
 754static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
 755                                struct inc_context *ic)
 756{
 757        reset_inc_context(ll, ic);
 758        return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
 759                               &b, &ll->ref_count_root);
 760}
 761
 762static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
 763                                struct inc_context *ic, uint32_t *old_rc)
 764{
 765        int r;
 766        int index = -1;
 767        struct btree_node *n;
 768        __le32 *v_ptr;
 769        uint32_t rc;
 770
 771        reset_inc_context(ll, ic);
 772        r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
 773                                     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
 774        if (r < 0)
 775                return r;
 776
 777        n = dm_block_data(ic->overflow_leaf);
 778
 779        if (!contains_key(n, b, index)) {
 780                DMERR("overflow btree is missing an entry");
 781                return -EINVAL;
 782        }
 783
 784        v_ptr = value_ptr(n, index);
 785        rc = le32_to_cpu(*v_ptr);
 786        *old_rc = rc;
 787
 788        if (rc == 3) {
 789                return __sm_ll_del_overflow(ll, b, ic);
 790        } else {
 791                rc--;
 792                *v_ptr = cpu_to_le32(rc);
 793                return 0;
 794        }
 795}
 796
 797static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
 798                              struct inc_context *ic, uint32_t *old_rc)
 799{
 800        /*
 801         * Do we already have the correct overflow leaf?
 802         */
 803        if (ic->overflow_leaf) {
 804                int index;
 805                struct btree_node *n;
 806                __le32 *v_ptr;
 807                uint32_t rc;
 808
 809                n = dm_block_data(ic->overflow_leaf);
 810                index = lower_bound(n, b);
 811                if (contains_key(n, b, index)) {
 812                        v_ptr = value_ptr(n, index);
 813                        rc = le32_to_cpu(*v_ptr);
 814                        *old_rc = rc;
 815
 816                        if (rc > 3) {
 817                                rc--;
 818                                *v_ptr = cpu_to_le32(rc);
 819                                return 0;
 820                        } else {
 821                                return __sm_ll_del_overflow(ll, b, ic);
 822                        }
 823
 824                }
 825        }
 826
 827        return __sm_ll_dec_overflow(ll, b, ic, old_rc);
 828}
 829
 830/*
 831 * Loops round incrementing entries in a single bitmap.
 832 */
 833static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
 834                                   uint32_t bit, uint32_t bit_end,
 835                                   struct inc_context *ic,
 836                                   int32_t *nr_allocations, dm_block_t *new_b)
 837{
 838        int r;
 839        uint32_t old;
 840
 841        for (; bit != bit_end; bit++, b++) {
 842                /*
 843                 * We only need to drop the bitmap if we need to find a new btree
 844                 * leaf for the overflow.  So if it was dropped last iteration,
 845                 * we now re-get it.
 846                 */
 847                r = ensure_bitmap(ll, ic);
 848                if (r)
 849                        return r;
 850
 851                old = sm_lookup_bitmap(ic->bitmap, bit);
 852                switch (old) {
 853                case 0:
 854                        DMERR("unable to decrement block");
 855                        return -EINVAL;
 856
 857                case 1:
 858                        /* dec bitmap */
 859                        sm_set_bitmap(ic->bitmap, bit, 0);
 860                        (*nr_allocations)--;
 861                        ll->nr_allocated--;
 862                        le32_add_cpu(&ic->ie_disk.nr_free, 1);
 863                        ic->ie_disk.none_free_before =
 864                                cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
 865                        break;
 866
 867                case 2:
 868                        /* dec bitmap and insert into overflow */
 869                        sm_set_bitmap(ic->bitmap, bit, 1);
 870                        break;
 871
 872                case 3:
 873                        r = sm_ll_dec_overflow(ll, b, ic, &old);
 874                        if (r < 0)
 875                                return r;
 876
 877                        if (old == 3) {
 878                                r = ensure_bitmap(ll, ic);
 879                                if (r)
 880                                        return r;
 881
 882                                sm_set_bitmap(ic->bitmap, bit, 2);
 883                        }
 884                        break;
 885                }
 886        }
 887
 888        *new_b = b;
 889        return 0;
 890}
 891
 892static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 893                       int32_t *nr_allocations, dm_block_t *new_b)
 894{
 895        int r;
 896        uint32_t bit, bit_end;
 897        struct inc_context ic;
 898        dm_block_t index = b;
 899
 900        init_inc_context(&ic);
 901
 902        bit = do_div(index, ll->entries_per_block);
 903        r = ll->load_ie(ll, index, &ic.ie_disk);
 904        if (r < 0)
 905                return r;
 906
 907        r = shadow_bitmap(ll, &ic);
 908        if (r)
 909                return r;
 910
 911        bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
 912        r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
 913        exit_inc_context(ll, &ic);
 914
 915        if (r)
 916                return r;
 917
 918        return ll->save_ie(ll, index, &ic.ie_disk);
 919}
 920
 921int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
 922              int32_t *nr_allocations)
 923{
 924        *nr_allocations = 0;
 925        while (b != e) {
 926                int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
 927                if (r)
 928                        return r;
 929        }
 930
 931        return 0;
 932}
 933
 934/*----------------------------------------------------------------*/
 935
 936int sm_ll_commit(struct ll_disk *ll)
 937{
 938        int r = 0;
 939
 940        if (ll->bitmap_index_changed) {
 941                r = ll->commit(ll);
 942                if (!r)
 943                        ll->bitmap_index_changed = false;
 944        }
 945
 946        return r;
 947}
 948
 949/*----------------------------------------------------------------*/
 950
 951static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
 952                               struct disk_index_entry *ie)
 953{
 954        memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
 955        return 0;
 956}
 957
 958static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
 959                               struct disk_index_entry *ie)
 960{
 961        ll->bitmap_index_changed = true;
 962        memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
 963        return 0;
 964}
 965
 966static int metadata_ll_init_index(struct ll_disk *ll)
 967{
 968        int r;
 969        struct dm_block *b;
 970
 971        r = dm_tm_new_block(ll->tm, &index_validator, &b);
 972        if (r < 0)
 973                return r;
 974
 975        ll->bitmap_root = dm_block_location(b);
 976
 977        dm_tm_unlock(ll->tm, b);
 978
 979        return 0;
 980}
 981
 982static int metadata_ll_open(struct ll_disk *ll)
 983{
 984        int r;
 985        struct dm_block *block;
 986
 987        r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
 988                            &index_validator, &block);
 989        if (r)
 990                return r;
 991
 992        memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
 993        dm_tm_unlock(ll->tm, block);
 994
 995        return 0;
 996}
 997
 998static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
 999{
1000        return MAX_METADATA_BITMAPS;
1001}
1002
1003static int metadata_ll_commit(struct ll_disk *ll)
1004{
1005        int r, inc;
1006        struct dm_block *b;
1007
1008        r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1009        if (r)
1010                return r;
1011
1012        memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1013        ll->bitmap_root = dm_block_location(b);
1014
1015        dm_tm_unlock(ll->tm, b);
1016
1017        return 0;
1018}
1019
1020int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1021{
1022        int r;
1023
1024        r = sm_ll_init(ll, tm);
1025        if (r < 0)
1026                return r;
1027
1028        ll->load_ie = metadata_ll_load_ie;
1029        ll->save_ie = metadata_ll_save_ie;
1030        ll->init_index = metadata_ll_init_index;
1031        ll->open_index = metadata_ll_open;
1032        ll->max_entries = metadata_ll_max_entries;
1033        ll->commit = metadata_ll_commit;
1034
1035        ll->nr_blocks = 0;
1036        ll->nr_allocated = 0;
1037
1038        r = ll->init_index(ll);
1039        if (r < 0)
1040                return r;
1041
1042        r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1043        if (r < 0)
1044                return r;
1045
1046        return 0;
1047}
1048
1049int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1050                        void *root_le, size_t len)
1051{
1052        int r;
1053        struct disk_sm_root smr;
1054
1055        if (len < sizeof(struct disk_sm_root)) {
1056                DMERR("sm_metadata root too small");
1057                return -ENOMEM;
1058        }
1059
1060        /*
1061         * We don't know the alignment of the root_le buffer, so need to
1062         * copy into a new structure.
1063         */
1064        memcpy(&smr, root_le, sizeof(smr));
1065
1066        r = sm_ll_init(ll, tm);
1067        if (r < 0)
1068                return r;
1069
1070        ll->load_ie = metadata_ll_load_ie;
1071        ll->save_ie = metadata_ll_save_ie;
1072        ll->init_index = metadata_ll_init_index;
1073        ll->open_index = metadata_ll_open;
1074        ll->max_entries = metadata_ll_max_entries;
1075        ll->commit = metadata_ll_commit;
1076
1077        ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1078        ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1079        ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1080        ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1081
1082        return ll->open_index(ll);
1083}
1084
1085/*----------------------------------------------------------------*/
1086
1087static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1088{
1089        iec->dirty = false;
1090        __dm_bless_for_disk(iec->ie);
1091        return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1092                               &iec->index, &iec->ie, &ll->bitmap_root);
1093}
1094
1095static inline unsigned hash_index(dm_block_t index)
1096{
1097        return dm_hash_block(index, IE_CACHE_MASK);
1098}
1099
1100static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1101                           struct disk_index_entry *ie)
1102{
1103        int r;
1104        unsigned h = hash_index(index);
1105        struct ie_cache *iec = ll->ie_cache + h;
1106
1107        if (iec->valid) {
1108                if (iec->index == index) {
1109                        memcpy(ie, &iec->ie, sizeof(*ie));
1110                        return 0;
1111                }
1112
1113                if (iec->dirty) {
1114                        r = ie_cache_writeback(ll, iec);
1115                        if (r)
1116                                return r;
1117                }
1118        }
1119
1120        r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1121        if (!r) {
1122                iec->valid = true;
1123                iec->dirty = false;
1124                iec->index = index;
1125                memcpy(&iec->ie, ie, sizeof(*ie));
1126        }
1127
1128        return r;
1129}
1130
1131static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1132                           struct disk_index_entry *ie)
1133{
1134        int r;
1135        unsigned h = hash_index(index);
1136        struct ie_cache *iec = ll->ie_cache + h;
1137
1138        ll->bitmap_index_changed = true;
1139        if (iec->valid) {
1140                if (iec->index == index) {
1141                        memcpy(&iec->ie, ie, sizeof(*ie));
1142                        iec->dirty = true;
1143                        return 0;
1144                }
1145
1146                if (iec->dirty) {
1147                        r = ie_cache_writeback(ll, iec);
1148                        if (r)
1149                                return r;
1150                }
1151        }
1152
1153        iec->valid = true;
1154        iec->dirty = true;
1155        iec->index = index;
1156        memcpy(&iec->ie, ie, sizeof(*ie));
1157        return 0;
1158}
1159
1160static int disk_ll_init_index(struct ll_disk *ll)
1161{
1162        unsigned i;
1163        for (i = 0; i < IE_CACHE_SIZE; i++) {
1164                struct ie_cache *iec = ll->ie_cache + i;
1165                iec->valid = false;
1166                iec->dirty = false;
1167        }
1168        return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1169}
1170
1171static int disk_ll_open(struct ll_disk *ll)
1172{
1173        return 0;
1174}
1175
1176static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1177{
1178        return -1ULL;
1179}
1180
1181static int disk_ll_commit(struct ll_disk *ll)
1182{
1183        int r = 0;
1184        unsigned i;
1185
1186        for (i = 0; i < IE_CACHE_SIZE; i++) {
1187                struct ie_cache *iec = ll->ie_cache + i;
1188                if (iec->valid && iec->dirty)
1189                        r = ie_cache_writeback(ll, iec);
1190        }
1191
1192        return r;
1193}
1194
1195int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1196{
1197        int r;
1198
1199        r = sm_ll_init(ll, tm);
1200        if (r < 0)
1201                return r;
1202
1203        ll->load_ie = disk_ll_load_ie;
1204        ll->save_ie = disk_ll_save_ie;
1205        ll->init_index = disk_ll_init_index;
1206        ll->open_index = disk_ll_open;
1207        ll->max_entries = disk_ll_max_entries;
1208        ll->commit = disk_ll_commit;
1209
1210        ll->nr_blocks = 0;
1211        ll->nr_allocated = 0;
1212
1213        r = ll->init_index(ll);
1214        if (r < 0)
1215                return r;
1216
1217        r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1218        if (r < 0)
1219                return r;
1220
1221        return 0;
1222}
1223
1224int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1225                    void *root_le, size_t len)
1226{
1227        int r;
1228        struct disk_sm_root *smr = root_le;
1229
1230        if (len < sizeof(struct disk_sm_root)) {
1231                DMERR("sm_metadata root too small");
1232                return -ENOMEM;
1233        }
1234
1235        r = sm_ll_init(ll, tm);
1236        if (r < 0)
1237                return r;
1238
1239        ll->load_ie = disk_ll_load_ie;
1240        ll->save_ie = disk_ll_save_ie;
1241        ll->init_index = disk_ll_init_index;
1242        ll->open_index = disk_ll_open;
1243        ll->max_entries = disk_ll_max_entries;
1244        ll->commit = disk_ll_commit;
1245
1246        ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1247        ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1248        ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1249        ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1250
1251        return ll->open_index(ll);
1252}
1253
1254/*----------------------------------------------------------------*/
1255