linux/drivers/md/persistent-data/dm-array.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Red Hat, Inc.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-array.h"
   8#include "dm-space-map.h"
   9#include "dm-transaction-manager.h"
  10
  11#include <linux/export.h>
  12#include <linux/device-mapper.h>
  13
  14#define DM_MSG_PREFIX "array"
  15
  16/*----------------------------------------------------------------*/
  17
  18/*
  19 * The array is implemented as a fully populated btree, which points to
  20 * blocks that contain the packed values.  This is more space efficient
  21 * than just using a btree since we don't store 1 key per value.
  22 */
  23struct array_block {
  24        __le32 csum;
  25        __le32 max_entries;
  26        __le32 nr_entries;
  27        __le32 value_size;
  28        __le64 blocknr; /* Block this node is supposed to live in. */
  29} __packed;
  30
  31/*----------------------------------------------------------------*/
  32
  33/*
  34 * Validator methods.  As usual we calculate a checksum, and also write the
  35 * block location into the header (paranoia about ssds remapping areas by
  36 * mistake).
  37 */
  38#define CSUM_XOR 595846735
  39
  40static void array_block_prepare_for_write(struct dm_block_validator *v,
  41                                          struct dm_block *b,
  42                                          size_t size_of_block)
  43{
  44        struct array_block *bh_le = dm_block_data(b);
  45
  46        bh_le->blocknr = cpu_to_le64(dm_block_location(b));
  47        bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
  48                                                 size_of_block - sizeof(__le32),
  49                                                 CSUM_XOR));
  50}
  51
  52static int array_block_check(struct dm_block_validator *v,
  53                             struct dm_block *b,
  54                             size_t size_of_block)
  55{
  56        struct array_block *bh_le = dm_block_data(b);
  57        __le32 csum_disk;
  58
  59        if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
  60                DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
  61                            (unsigned long long) le64_to_cpu(bh_le->blocknr),
  62                            (unsigned long long) dm_block_location(b));
  63                return -ENOTBLK;
  64        }
  65
  66        csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
  67                                               size_of_block - sizeof(__le32),
  68                                               CSUM_XOR));
  69        if (csum_disk != bh_le->csum) {
  70                DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
  71                            (unsigned) le32_to_cpu(csum_disk),
  72                            (unsigned) le32_to_cpu(bh_le->csum));
  73                return -EILSEQ;
  74        }
  75
  76        return 0;
  77}
  78
  79static struct dm_block_validator array_validator = {
  80        .name = "array",
  81        .prepare_for_write = array_block_prepare_for_write,
  82        .check = array_block_check
  83};
  84
  85/*----------------------------------------------------------------*/
  86
  87/*
  88 * Functions for manipulating the array blocks.
  89 */
  90
  91/*
  92 * Returns a pointer to a value within an array block.
  93 *
  94 * index - The index into _this_ specific block.
  95 */
  96static void *element_at(struct dm_array_info *info, struct array_block *ab,
  97                        unsigned index)
  98{
  99        unsigned char *entry = (unsigned char *) (ab + 1);
 100
 101        entry += index * info->value_type.size;
 102
 103        return entry;
 104}
 105
 106/*
 107 * Utility function that calls one of the value_type methods on every value
 108 * in an array block.
 109 */
 110static void on_entries(struct dm_array_info *info, struct array_block *ab,
 111                       void (*fn)(void *, const void *))
 112{
 113        unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
 114
 115        for (i = 0; i < nr_entries; i++)
 116                fn(info->value_type.context, element_at(info, ab, i));
 117}
 118
 119/*
 120 * Increment every value in an array block.
 121 */
 122static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
 123{
 124        struct dm_btree_value_type *vt = &info->value_type;
 125
 126        if (vt->inc)
 127                on_entries(info, ab, vt->inc);
 128}
 129
 130/*
 131 * Decrement every value in an array block.
 132 */
 133static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
 134{
 135        struct dm_btree_value_type *vt = &info->value_type;
 136
 137        if (vt->dec)
 138                on_entries(info, ab, vt->dec);
 139}
 140
 141/*
 142 * Each array block can hold this many values.
 143 */
 144static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
 145{
 146        return (size_of_block - sizeof(struct array_block)) / value_size;
 147}
 148
 149/*
 150 * Allocate a new array block.  The caller will need to unlock block.
 151 */
 152static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
 153                        uint32_t max_entries,
 154                        struct dm_block **block, struct array_block **ab)
 155{
 156        int r;
 157
 158        r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
 159        if (r)
 160                return r;
 161
 162        (*ab) = dm_block_data(*block);
 163        (*ab)->max_entries = cpu_to_le32(max_entries);
 164        (*ab)->nr_entries = cpu_to_le32(0);
 165        (*ab)->value_size = cpu_to_le32(info->value_type.size);
 166
 167        return 0;
 168}
 169
 170/*
 171 * Pad an array block out with a particular value.  Every instance will
 172 * cause an increment of the value_type.  new_nr must always be more than
 173 * the current number of entries.
 174 */
 175static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
 176                        const void *value, unsigned new_nr)
 177{
 178        unsigned i;
 179        uint32_t nr_entries;
 180        struct dm_btree_value_type *vt = &info->value_type;
 181
 182        BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 183        BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
 184
 185        nr_entries = le32_to_cpu(ab->nr_entries);
 186        for (i = nr_entries; i < new_nr; i++) {
 187                if (vt->inc)
 188                        vt->inc(vt->context, value);
 189                memcpy(element_at(info, ab, i), value, vt->size);
 190        }
 191        ab->nr_entries = cpu_to_le32(new_nr);
 192}
 193
 194/*
 195 * Remove some entries from the back of an array block.  Every value
 196 * removed will be decremented.  new_nr must be <= the current number of
 197 * entries.
 198 */
 199static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
 200                        unsigned new_nr)
 201{
 202        unsigned i;
 203        uint32_t nr_entries;
 204        struct dm_btree_value_type *vt = &info->value_type;
 205
 206        BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 207        BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
 208
 209        nr_entries = le32_to_cpu(ab->nr_entries);
 210        for (i = nr_entries; i > new_nr; i--)
 211                if (vt->dec)
 212                        vt->dec(vt->context, element_at(info, ab, i - 1));
 213        ab->nr_entries = cpu_to_le32(new_nr);
 214}
 215
 216/*
 217 * Read locks a block, and coerces it to an array block.  The caller must
 218 * unlock 'block' when finished.
 219 */
 220static int get_ablock(struct dm_array_info *info, dm_block_t b,
 221                      struct dm_block **block, struct array_block **ab)
 222{
 223        int r;
 224
 225        r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
 226        if (r)
 227                return r;
 228
 229        *ab = dm_block_data(*block);
 230        return 0;
 231}
 232
 233/*
 234 * Unlocks an array block.
 235 */
 236static void unlock_ablock(struct dm_array_info *info, struct dm_block *block)
 237{
 238        dm_tm_unlock(info->btree_info.tm, block);
 239}
 240
 241/*----------------------------------------------------------------*/
 242
 243/*
 244 * Btree manipulation.
 245 */
 246
 247/*
 248 * Looks up an array block in the btree, and then read locks it.
 249 *
 250 * index is the index of the index of the array_block, (ie. the array index
 251 * / max_entries).
 252 */
 253static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
 254                         unsigned index, struct dm_block **block,
 255                         struct array_block **ab)
 256{
 257        int r;
 258        uint64_t key = index;
 259        __le64 block_le;
 260
 261        r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
 262        if (r)
 263                return r;
 264
 265        return get_ablock(info, le64_to_cpu(block_le), block, ab);
 266}
 267
 268/*
 269 * Insert an array block into the btree.  The block is _not_ unlocked.
 270 */
 271static int insert_ablock(struct dm_array_info *info, uint64_t index,
 272                         struct dm_block *block, dm_block_t *root)
 273{
 274        __le64 block_le = cpu_to_le64(dm_block_location(block));
 275
 276        __dm_bless_for_disk(block_le);
 277        return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
 278}
 279
 280/*----------------------------------------------------------------*/
 281
 282static int __shadow_ablock(struct dm_array_info *info, dm_block_t b,
 283                           struct dm_block **block, struct array_block **ab)
 284{
 285        int inc;
 286        int r = dm_tm_shadow_block(info->btree_info.tm, b,
 287                                   &array_validator, block, &inc);
 288        if (r)
 289                return r;
 290
 291        *ab = dm_block_data(*block);
 292        if (inc)
 293                inc_ablock_entries(info, *ab);
 294
 295        return 0;
 296}
 297
 298/*
 299 * The shadow op will often be a noop.  Only insert if it really
 300 * copied data.
 301 */
 302static int __reinsert_ablock(struct dm_array_info *info, unsigned index,
 303                             struct dm_block *block, dm_block_t b,
 304                             dm_block_t *root)
 305{
 306        int r = 0;
 307
 308        if (dm_block_location(block) != b) {
 309                /*
 310                 * dm_tm_shadow_block will have already decremented the old
 311                 * block, but it is still referenced by the btree.  We
 312                 * increment to stop the insert decrementing it below zero
 313                 * when overwriting the old value.
 314                 */
 315                dm_tm_inc(info->btree_info.tm, b);
 316                r = insert_ablock(info, index, block, root);
 317        }
 318
 319        return r;
 320}
 321
 322/*
 323 * Looks up an array block in the btree.  Then shadows it, and updates the
 324 * btree to point to this new shadow.  'root' is an input/output parameter
 325 * for both the current root block, and the new one.
 326 */
 327static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
 328                         unsigned index, struct dm_block **block,
 329                         struct array_block **ab)
 330{
 331        int r;
 332        uint64_t key = index;
 333        dm_block_t b;
 334        __le64 block_le;
 335
 336        r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
 337        if (r)
 338                return r;
 339        b = le64_to_cpu(block_le);
 340
 341        r = __shadow_ablock(info, b, block, ab);
 342        if (r)
 343                return r;
 344
 345        return __reinsert_ablock(info, index, *block, b, root);
 346}
 347
 348/*
 349 * Allocate an new array block, and fill it with some values.
 350 */
 351static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
 352                             uint32_t max_entries,
 353                             unsigned block_index, uint32_t nr,
 354                             const void *value, dm_block_t *root)
 355{
 356        int r;
 357        struct dm_block *block;
 358        struct array_block *ab;
 359
 360        r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
 361        if (r)
 362                return r;
 363
 364        fill_ablock(info, ab, value, nr);
 365        r = insert_ablock(info, block_index, block, root);
 366        unlock_ablock(info, block);
 367
 368        return r;
 369}
 370
 371static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
 372                               unsigned begin_block, unsigned end_block,
 373                               unsigned max_entries, const void *value,
 374                               dm_block_t *root)
 375{
 376        int r = 0;
 377
 378        for (; !r && begin_block != end_block; begin_block++)
 379                r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
 380
 381        return r;
 382}
 383
 384/*
 385 * There are a bunch of functions involved with resizing an array.  This
 386 * structure holds information that commonly needed by them.  Purely here
 387 * to reduce parameter count.
 388 */
 389struct resize {
 390        /*
 391         * Describes the array.
 392         */
 393        struct dm_array_info *info;
 394
 395        /*
 396         * The current root of the array.  This gets updated.
 397         */
 398        dm_block_t root;
 399
 400        /*
 401         * Metadata block size.  Used to calculate the nr entries in an
 402         * array block.
 403         */
 404        size_t size_of_block;
 405
 406        /*
 407         * Maximum nr entries in an array block.
 408         */
 409        unsigned max_entries;
 410
 411        /*
 412         * nr of completely full blocks in the array.
 413         *
 414         * 'old' refers to before the resize, 'new' after.
 415         */
 416        unsigned old_nr_full_blocks, new_nr_full_blocks;
 417
 418        /*
 419         * Number of entries in the final block.  0 iff only full blocks in
 420         * the array.
 421         */
 422        unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
 423
 424        /*
 425         * The default value used when growing the array.
 426         */
 427        const void *value;
 428};
 429
 430/*
 431 * Removes a consecutive set of array blocks from the btree.  The values
 432 * in block are decremented as a side effect of the btree remove.
 433 *
 434 * begin_index - the index of the first array block to remove.
 435 * end_index - the one-past-the-end value.  ie. this block is not removed.
 436 */
 437static int drop_blocks(struct resize *resize, unsigned begin_index,
 438                       unsigned end_index)
 439{
 440        int r;
 441
 442        while (begin_index != end_index) {
 443                uint64_t key = begin_index++;
 444                r = dm_btree_remove(&resize->info->btree_info, resize->root,
 445                                    &key, &resize->root);
 446                if (r)
 447                        return r;
 448        }
 449
 450        return 0;
 451}
 452
 453/*
 454 * Calculates how many blocks are needed for the array.
 455 */
 456static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
 457                                       unsigned nr_entries_in_last_block)
 458{
 459        return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
 460}
 461
 462/*
 463 * Shrink an array.
 464 */
 465static int shrink(struct resize *resize)
 466{
 467        int r;
 468        unsigned begin, end;
 469        struct dm_block *block;
 470        struct array_block *ab;
 471
 472        /*
 473         * Lose some blocks from the back?
 474         */
 475        if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
 476                begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
 477                                               resize->new_nr_entries_in_last_block);
 478                end = total_nr_blocks_needed(resize->old_nr_full_blocks,
 479                                             resize->old_nr_entries_in_last_block);
 480
 481                r = drop_blocks(resize, begin, end);
 482                if (r)
 483                        return r;
 484        }
 485
 486        /*
 487         * Trim the new tail block
 488         */
 489        if (resize->new_nr_entries_in_last_block) {
 490                r = shadow_ablock(resize->info, &resize->root,
 491                                  resize->new_nr_full_blocks, &block, &ab);
 492                if (r)
 493                        return r;
 494
 495                trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
 496                unlock_ablock(resize->info, block);
 497        }
 498
 499        return 0;
 500}
 501
 502/*
 503 * Grow an array.
 504 */
 505static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
 506{
 507        int r;
 508        struct dm_block *block;
 509        struct array_block *ab;
 510
 511        r = shadow_ablock(resize->info, &resize->root,
 512                          resize->old_nr_full_blocks, &block, &ab);
 513        if (r)
 514                return r;
 515
 516        fill_ablock(resize->info, ab, resize->value, new_nr_entries);
 517        unlock_ablock(resize->info, block);
 518
 519        return r;
 520}
 521
 522static int grow_add_tail_block(struct resize *resize)
 523{
 524        return insert_new_ablock(resize->info, resize->size_of_block,
 525                                 resize->max_entries,
 526                                 resize->new_nr_full_blocks,
 527                                 resize->new_nr_entries_in_last_block,
 528                                 resize->value, &resize->root);
 529}
 530
 531static int grow_needs_more_blocks(struct resize *resize)
 532{
 533        int r;
 534        unsigned old_nr_blocks = resize->old_nr_full_blocks;
 535
 536        if (resize->old_nr_entries_in_last_block > 0) {
 537                old_nr_blocks++;
 538
 539                r = grow_extend_tail_block(resize, resize->max_entries);
 540                if (r)
 541                        return r;
 542        }
 543
 544        r = insert_full_ablocks(resize->info, resize->size_of_block,
 545                                old_nr_blocks,
 546                                resize->new_nr_full_blocks,
 547                                resize->max_entries, resize->value,
 548                                &resize->root);
 549        if (r)
 550                return r;
 551
 552        if (resize->new_nr_entries_in_last_block)
 553                r = grow_add_tail_block(resize);
 554
 555        return r;
 556}
 557
 558static int grow(struct resize *resize)
 559{
 560        if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
 561                return grow_needs_more_blocks(resize);
 562
 563        else if (resize->old_nr_entries_in_last_block)
 564                return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
 565
 566        else
 567                return grow_add_tail_block(resize);
 568}
 569
 570/*----------------------------------------------------------------*/
 571
 572/*
 573 * These are the value_type functions for the btree elements, which point
 574 * to array blocks.
 575 */
 576static void block_inc(void *context, const void *value)
 577{
 578        __le64 block_le;
 579        struct dm_array_info *info = context;
 580
 581        memcpy(&block_le, value, sizeof(block_le));
 582        dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
 583}
 584
 585static void block_dec(void *context, const void *value)
 586{
 587        int r;
 588        uint64_t b;
 589        __le64 block_le;
 590        uint32_t ref_count;
 591        struct dm_block *block;
 592        struct array_block *ab;
 593        struct dm_array_info *info = context;
 594
 595        memcpy(&block_le, value, sizeof(block_le));
 596        b = le64_to_cpu(block_le);
 597
 598        r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
 599        if (r) {
 600                DMERR_LIMIT("couldn't get reference count for block %llu",
 601                            (unsigned long long) b);
 602                return;
 603        }
 604
 605        if (ref_count == 1) {
 606                /*
 607                 * We're about to drop the last reference to this ablock.
 608                 * So we need to decrement the ref count of the contents.
 609                 */
 610                r = get_ablock(info, b, &block, &ab);
 611                if (r) {
 612                        DMERR_LIMIT("couldn't get array block %llu",
 613                                    (unsigned long long) b);
 614                        return;
 615                }
 616
 617                dec_ablock_entries(info, ab);
 618                unlock_ablock(info, block);
 619        }
 620
 621        dm_tm_dec(info->btree_info.tm, b);
 622}
 623
 624static int block_equal(void *context, const void *value1, const void *value2)
 625{
 626        return !memcmp(value1, value2, sizeof(__le64));
 627}
 628
 629/*----------------------------------------------------------------*/
 630
 631void dm_array_info_init(struct dm_array_info *info,
 632                        struct dm_transaction_manager *tm,
 633                        struct dm_btree_value_type *vt)
 634{
 635        struct dm_btree_value_type *bvt = &info->btree_info.value_type;
 636
 637        memcpy(&info->value_type, vt, sizeof(info->value_type));
 638        info->btree_info.tm = tm;
 639        info->btree_info.levels = 1;
 640
 641        bvt->context = info;
 642        bvt->size = sizeof(__le64);
 643        bvt->inc = block_inc;
 644        bvt->dec = block_dec;
 645        bvt->equal = block_equal;
 646}
 647EXPORT_SYMBOL_GPL(dm_array_info_init);
 648
 649int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
 650{
 651        return dm_btree_empty(&info->btree_info, root);
 652}
 653EXPORT_SYMBOL_GPL(dm_array_empty);
 654
 655static int array_resize(struct dm_array_info *info, dm_block_t root,
 656                        uint32_t old_size, uint32_t new_size,
 657                        const void *value, dm_block_t *new_root)
 658{
 659        int r;
 660        struct resize resize;
 661
 662        if (old_size == new_size) {
 663                *new_root = root;
 664                return 0;
 665        }
 666
 667        resize.info = info;
 668        resize.root = root;
 669        resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
 670        resize.max_entries = calc_max_entries(info->value_type.size,
 671                                              resize.size_of_block);
 672
 673        resize.old_nr_full_blocks = old_size / resize.max_entries;
 674        resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
 675        resize.new_nr_full_blocks = new_size / resize.max_entries;
 676        resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
 677        resize.value = value;
 678
 679        r = ((new_size > old_size) ? grow : shrink)(&resize);
 680        if (r)
 681                return r;
 682
 683        *new_root = resize.root;
 684        return 0;
 685}
 686
 687int dm_array_resize(struct dm_array_info *info, dm_block_t root,
 688                    uint32_t old_size, uint32_t new_size,
 689                    const void *value, dm_block_t *new_root)
 690                    __dm_written_to_disk(value)
 691{
 692        int r = array_resize(info, root, old_size, new_size, value, new_root);
 693        __dm_unbless_for_disk(value);
 694        return r;
 695}
 696EXPORT_SYMBOL_GPL(dm_array_resize);
 697
 698static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab,
 699                                       value_fn fn, void *context, unsigned base, unsigned new_nr)
 700{
 701        int r;
 702        unsigned i;
 703        struct dm_btree_value_type *vt = &info->value_type;
 704
 705        BUG_ON(le32_to_cpu(ab->nr_entries));
 706        BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 707
 708        for (i = 0; i < new_nr; i++) {
 709                r = fn(base + i, element_at(info, ab, i), context);
 710                if (r)
 711                        return r;
 712
 713                if (vt->inc)
 714                        vt->inc(vt->context, element_at(info, ab, i));
 715        }
 716
 717        ab->nr_entries = cpu_to_le32(new_nr);
 718        return 0;
 719}
 720
 721int dm_array_new(struct dm_array_info *info, dm_block_t *root,
 722                 uint32_t size, value_fn fn, void *context)
 723{
 724        int r;
 725        struct dm_block *block;
 726        struct array_block *ab;
 727        unsigned block_index, end_block, size_of_block, max_entries;
 728
 729        r = dm_array_empty(info, root);
 730        if (r)
 731                return r;
 732
 733        size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
 734        max_entries = calc_max_entries(info->value_type.size, size_of_block);
 735        end_block = dm_div_up(size, max_entries);
 736
 737        for (block_index = 0; block_index != end_block; block_index++) {
 738                r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
 739                if (r)
 740                        break;
 741
 742                r = populate_ablock_with_values(info, ab, fn, context,
 743                                                block_index * max_entries,
 744                                                min(max_entries, size));
 745                if (r) {
 746                        unlock_ablock(info, block);
 747                        break;
 748                }
 749
 750                r = insert_ablock(info, block_index, block, root);
 751                unlock_ablock(info, block);
 752                if (r)
 753                        break;
 754
 755                size -= max_entries;
 756        }
 757
 758        return r;
 759}
 760EXPORT_SYMBOL_GPL(dm_array_new);
 761
 762int dm_array_del(struct dm_array_info *info, dm_block_t root)
 763{
 764        return dm_btree_del(&info->btree_info, root);
 765}
 766EXPORT_SYMBOL_GPL(dm_array_del);
 767
 768int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
 769                       uint32_t index, void *value_le)
 770{
 771        int r;
 772        struct dm_block *block;
 773        struct array_block *ab;
 774        size_t size_of_block;
 775        unsigned entry, max_entries;
 776
 777        size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
 778        max_entries = calc_max_entries(info->value_type.size, size_of_block);
 779
 780        r = lookup_ablock(info, root, index / max_entries, &block, &ab);
 781        if (r)
 782                return r;
 783
 784        entry = index % max_entries;
 785        if (entry >= le32_to_cpu(ab->nr_entries))
 786                r = -ENODATA;
 787        else
 788                memcpy(value_le, element_at(info, ab, entry),
 789                       info->value_type.size);
 790
 791        unlock_ablock(info, block);
 792        return r;
 793}
 794EXPORT_SYMBOL_GPL(dm_array_get_value);
 795
 796static int array_set_value(struct dm_array_info *info, dm_block_t root,
 797                           uint32_t index, const void *value, dm_block_t *new_root)
 798{
 799        int r;
 800        struct dm_block *block;
 801        struct array_block *ab;
 802        size_t size_of_block;
 803        unsigned max_entries;
 804        unsigned entry;
 805        void *old_value;
 806        struct dm_btree_value_type *vt = &info->value_type;
 807
 808        size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
 809        max_entries = calc_max_entries(info->value_type.size, size_of_block);
 810
 811        r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
 812        if (r)
 813                return r;
 814        *new_root = root;
 815
 816        entry = index % max_entries;
 817        if (entry >= le32_to_cpu(ab->nr_entries)) {
 818                r = -ENODATA;
 819                goto out;
 820        }
 821
 822        old_value = element_at(info, ab, entry);
 823        if (vt->dec &&
 824            (!vt->equal || !vt->equal(vt->context, old_value, value))) {
 825                vt->dec(vt->context, old_value);
 826                if (vt->inc)
 827                        vt->inc(vt->context, value);
 828        }
 829
 830        memcpy(old_value, value, info->value_type.size);
 831
 832out:
 833        unlock_ablock(info, block);
 834        return r;
 835}
 836
 837int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
 838                 uint32_t index, const void *value, dm_block_t *new_root)
 839                 __dm_written_to_disk(value)
 840{
 841        int r;
 842
 843        r = array_set_value(info, root, index, value, new_root);
 844        __dm_unbless_for_disk(value);
 845        return r;
 846}
 847EXPORT_SYMBOL_GPL(dm_array_set_value);
 848
 849struct walk_info {
 850        struct dm_array_info *info;
 851        int (*fn)(void *context, uint64_t key, void *leaf);
 852        void *context;
 853};
 854
 855static int walk_ablock(void *context, uint64_t *keys, void *leaf)
 856{
 857        struct walk_info *wi = context;
 858
 859        int r;
 860        unsigned i;
 861        __le64 block_le;
 862        unsigned nr_entries, max_entries;
 863        struct dm_block *block;
 864        struct array_block *ab;
 865
 866        memcpy(&block_le, leaf, sizeof(block_le));
 867        r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
 868        if (r)
 869                return r;
 870
 871        max_entries = le32_to_cpu(ab->max_entries);
 872        nr_entries = le32_to_cpu(ab->nr_entries);
 873        for (i = 0; i < nr_entries; i++) {
 874                r = wi->fn(wi->context, keys[0] * max_entries + i,
 875                           element_at(wi->info, ab, i));
 876
 877                if (r)
 878                        break;
 879        }
 880
 881        unlock_ablock(wi->info, block);
 882        return r;
 883}
 884
 885int dm_array_walk(struct dm_array_info *info, dm_block_t root,
 886                  int (*fn)(void *, uint64_t key, void *leaf),
 887                  void *context)
 888{
 889        struct walk_info wi;
 890
 891        wi.info = info;
 892        wi.fn = fn;
 893        wi.context = context;
 894
 895        return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
 896}
 897EXPORT_SYMBOL_GPL(dm_array_walk);
 898
 899/*----------------------------------------------------------------*/
 900
 901static int load_ablock(struct dm_array_cursor *c)
 902{
 903        int r;
 904        __le64 value_le;
 905        uint64_t key;
 906
 907        if (c->block)
 908                unlock_ablock(c->info, c->block);
 909
 910        c->block = NULL;
 911        c->ab = NULL;
 912        c->index = 0;
 913
 914        r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
 915        if (r) {
 916                DMERR("dm_btree_cursor_get_value failed");
 917                dm_btree_cursor_end(&c->cursor);
 918
 919        } else {
 920                r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
 921                if (r) {
 922                        DMERR("get_ablock failed");
 923                        dm_btree_cursor_end(&c->cursor);
 924                }
 925        }
 926
 927        return r;
 928}
 929
 930int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root,
 931                          struct dm_array_cursor *c)
 932{
 933        int r;
 934
 935        memset(c, 0, sizeof(*c));
 936        c->info = info;
 937        r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor);
 938        if (r) {
 939                DMERR("couldn't create btree cursor");
 940                return r;
 941        }
 942
 943        return load_ablock(c);
 944}
 945EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
 946
 947void dm_array_cursor_end(struct dm_array_cursor *c)
 948{
 949        if (c->block) {
 950                unlock_ablock(c->info, c->block);
 951                dm_btree_cursor_end(&c->cursor);
 952        }
 953}
 954EXPORT_SYMBOL_GPL(dm_array_cursor_end);
 955
 956int dm_array_cursor_next(struct dm_array_cursor *c)
 957{
 958        int r;
 959
 960        if (!c->block)
 961                return -ENODATA;
 962
 963        c->index++;
 964
 965        if (c->index >= le32_to_cpu(c->ab->nr_entries)) {
 966                r = dm_btree_cursor_next(&c->cursor);
 967                if (r)
 968                        return r;
 969
 970                r = load_ablock(c);
 971                if (r)
 972                        return r;
 973        }
 974
 975        return 0;
 976}
 977EXPORT_SYMBOL_GPL(dm_array_cursor_next);
 978
 979int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
 980{
 981        int r;
 982
 983        do {
 984                uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index;
 985
 986                if (count < remaining) {
 987                        c->index += count;
 988                        return 0;
 989                }
 990
 991                count -= remaining;
 992                r = dm_array_cursor_next(c);
 993
 994        } while (!r);
 995
 996        return r;
 997}
 998EXPORT_SYMBOL_GPL(dm_array_cursor_skip);
 999
1000void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
1001{
1002        *value_le = element_at(c->info, c->ab, c->index);
1003}
1004EXPORT_SYMBOL_GPL(dm_array_cursor_get_value);
1005
1006/*----------------------------------------------------------------*/
1007