linux/drivers/md/persistent-data/dm-space-map-metadata.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 Red Hat, Inc.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-space-map.h"
   8#include "dm-space-map-common.h"
   9#include "dm-space-map-metadata.h"
  10
  11#include <linux/list.h>
  12#include <linux/slab.h>
  13#include <linux/device-mapper.h>
  14
  15#define DM_MSG_PREFIX "space map metadata"
  16
  17/*----------------------------------------------------------------*/
  18
  19/*
  20 * An edge triggered threshold.
  21 */
  22struct threshold {
  23        bool threshold_set;
  24        bool value_set;
  25        dm_block_t threshold;
  26        dm_block_t current_value;
  27        dm_sm_threshold_fn fn;
  28        void *context;
  29};
  30
  31static void threshold_init(struct threshold *t)
  32{
  33        t->threshold_set = false;
  34        t->value_set = false;
  35}
  36
  37static void set_threshold(struct threshold *t, dm_block_t value,
  38                          dm_sm_threshold_fn fn, void *context)
  39{
  40        t->threshold_set = true;
  41        t->threshold = value;
  42        t->fn = fn;
  43        t->context = context;
  44}
  45
  46static bool below_threshold(struct threshold *t, dm_block_t value)
  47{
  48        return t->threshold_set && value <= t->threshold;
  49}
  50
  51static bool threshold_already_triggered(struct threshold *t)
  52{
  53        return t->value_set && below_threshold(t, t->current_value);
  54}
  55
  56static void check_threshold(struct threshold *t, dm_block_t value)
  57{
  58        if (below_threshold(t, value) &&
  59            !threshold_already_triggered(t))
  60                t->fn(t->context);
  61
  62        t->value_set = true;
  63        t->current_value = value;
  64}
  65
  66/*----------------------------------------------------------------*/
  67
  68/*
  69 * Space map interface.
  70 *
  71 * The low level disk format is written using the standard btree and
  72 * transaction manager.  This means that performing disk operations may
  73 * cause us to recurse into the space map in order to allocate new blocks.
  74 * For this reason we have a pool of pre-allocated blocks large enough to
  75 * service any metadata_ll_disk operation.
  76 */
  77
  78/*
  79 * FIXME: we should calculate this based on the size of the device.
  80 * Only the metadata space map needs this functionality.
  81 */
  82#define MAX_RECURSIVE_ALLOCATIONS 1024
  83
  84enum block_op_type {
  85        BOP_INC,
  86        BOP_DEC
  87};
  88
  89struct block_op {
  90        enum block_op_type type;
  91        dm_block_t block;
  92};
  93
  94struct bop_ring_buffer {
  95        unsigned begin;
  96        unsigned end;
  97        struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
  98};
  99
 100static void brb_init(struct bop_ring_buffer *brb)
 101{
 102        brb->begin = 0;
 103        brb->end = 0;
 104}
 105
 106static bool brb_empty(struct bop_ring_buffer *brb)
 107{
 108        return brb->begin == brb->end;
 109}
 110
 111static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
 112{
 113        unsigned r = old + 1;
 114        return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r;
 115}
 116
 117static int brb_push(struct bop_ring_buffer *brb,
 118                    enum block_op_type type, dm_block_t b)
 119{
 120        struct block_op *bop;
 121        unsigned next = brb_next(brb, brb->end);
 122
 123        /*
 124         * We don't allow the last bop to be filled, this way we can
 125         * differentiate between full and empty.
 126         */
 127        if (next == brb->begin)
 128                return -ENOMEM;
 129
 130        bop = brb->bops + brb->end;
 131        bop->type = type;
 132        bop->block = b;
 133
 134        brb->end = next;
 135
 136        return 0;
 137}
 138
 139static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
 140{
 141        struct block_op *bop;
 142
 143        if (brb_empty(brb))
 144                return -ENODATA;
 145
 146        bop = brb->bops + brb->begin;
 147        result->type = bop->type;
 148        result->block = bop->block;
 149
 150        brb->begin = brb_next(brb, brb->begin);
 151
 152        return 0;
 153}
 154
 155/*----------------------------------------------------------------*/
 156
 157struct sm_metadata {
 158        struct dm_space_map sm;
 159
 160        struct ll_disk ll;
 161        struct ll_disk old_ll;
 162
 163        dm_block_t begin;
 164
 165        unsigned recursion_count;
 166        unsigned allocated_this_transaction;
 167        struct bop_ring_buffer uncommitted;
 168
 169        struct threshold threshold;
 170};
 171
 172static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
 173{
 174        int r = brb_push(&smm->uncommitted, type, b);
 175
 176        if (r) {
 177                DMERR("too many recursive allocations");
 178                return -ENOMEM;
 179        }
 180
 181        return 0;
 182}
 183
 184static int commit_bop(struct sm_metadata *smm, struct block_op *op)
 185{
 186        int r = 0;
 187        enum allocation_event ev;
 188
 189        switch (op->type) {
 190        case BOP_INC:
 191                r = sm_ll_inc(&smm->ll, op->block, &ev);
 192                break;
 193
 194        case BOP_DEC:
 195                r = sm_ll_dec(&smm->ll, op->block, &ev);
 196                break;
 197        }
 198
 199        return r;
 200}
 201
 202static void in(struct sm_metadata *smm)
 203{
 204        smm->recursion_count++;
 205}
 206
 207static int out(struct sm_metadata *smm)
 208{
 209        int r = 0;
 210
 211        /*
 212         * If we're not recursing then very bad things are happening.
 213         */
 214        if (!smm->recursion_count) {
 215                DMERR("lost track of recursion depth");
 216                return -ENOMEM;
 217        }
 218
 219        if (smm->recursion_count == 1) {
 220                while (!brb_empty(&smm->uncommitted)) {
 221                        struct block_op bop;
 222
 223                        r = brb_pop(&smm->uncommitted, &bop);
 224                        if (r) {
 225                                DMERR("bug in bop ring buffer");
 226                                break;
 227                        }
 228
 229                        r = commit_bop(smm, &bop);
 230                        if (r)
 231                                break;
 232                }
 233        }
 234
 235        smm->recursion_count--;
 236
 237        return r;
 238}
 239
 240/*
 241 * When using the out() function above, we often want to combine an error
 242 * code for the operation run in the recursive context with that from
 243 * out().
 244 */
 245static int combine_errors(int r1, int r2)
 246{
 247        return r1 ? r1 : r2;
 248}
 249
 250static int recursing(struct sm_metadata *smm)
 251{
 252        return smm->recursion_count;
 253}
 254
 255static void sm_metadata_destroy(struct dm_space_map *sm)
 256{
 257        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 258
 259        kfree(smm);
 260}
 261
 262static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
 263{
 264        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 265
 266        *count = smm->ll.nr_blocks;
 267
 268        return 0;
 269}
 270
 271static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
 272{
 273        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 274
 275        *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
 276                 smm->allocated_this_transaction;
 277
 278        return 0;
 279}
 280
 281static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
 282                                 uint32_t *result)
 283{
 284        int r;
 285        unsigned i;
 286        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 287        unsigned adjustment = 0;
 288
 289        /*
 290         * We may have some uncommitted adjustments to add.  This list
 291         * should always be really short.
 292         */
 293        for (i = smm->uncommitted.begin;
 294             i != smm->uncommitted.end;
 295             i = brb_next(&smm->uncommitted, i)) {
 296                struct block_op *op = smm->uncommitted.bops + i;
 297
 298                if (op->block != b)
 299                        continue;
 300
 301                switch (op->type) {
 302                case BOP_INC:
 303                        adjustment++;
 304                        break;
 305
 306                case BOP_DEC:
 307                        adjustment--;
 308                        break;
 309                }
 310        }
 311
 312        r = sm_ll_lookup(&smm->ll, b, result);
 313        if (r)
 314                return r;
 315
 316        *result += adjustment;
 317
 318        return 0;
 319}
 320
 321static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
 322                                              dm_block_t b, int *result)
 323{
 324        int r, adjustment = 0;
 325        unsigned i;
 326        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 327        uint32_t rc;
 328
 329        /*
 330         * We may have some uncommitted adjustments to add.  This list
 331         * should always be really short.
 332         */
 333        for (i = smm->uncommitted.begin;
 334             i != smm->uncommitted.end;
 335             i = brb_next(&smm->uncommitted, i)) {
 336
 337                struct block_op *op = smm->uncommitted.bops + i;
 338
 339                if (op->block != b)
 340                        continue;
 341
 342                switch (op->type) {
 343                case BOP_INC:
 344                        adjustment++;
 345                        break;
 346
 347                case BOP_DEC:
 348                        adjustment--;
 349                        break;
 350                }
 351        }
 352
 353        if (adjustment > 1) {
 354                *result = 1;
 355                return 0;
 356        }
 357
 358        r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
 359        if (r)
 360                return r;
 361
 362        if (rc == 3)
 363                /*
 364                 * We err on the side of caution, and always return true.
 365                 */
 366                *result = 1;
 367        else
 368                *result = rc + adjustment > 1;
 369
 370        return 0;
 371}
 372
 373static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
 374                                 uint32_t count)
 375{
 376        int r, r2;
 377        enum allocation_event ev;
 378        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 379
 380        if (smm->recursion_count) {
 381                DMERR("cannot recurse set_count()");
 382                return -EINVAL;
 383        }
 384
 385        in(smm);
 386        r = sm_ll_insert(&smm->ll, b, count, &ev);
 387        r2 = out(smm);
 388
 389        return combine_errors(r, r2);
 390}
 391
 392static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
 393{
 394        int r, r2 = 0;
 395        enum allocation_event ev;
 396        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 397
 398        if (recursing(smm))
 399                r = add_bop(smm, BOP_INC, b);
 400        else {
 401                in(smm);
 402                r = sm_ll_inc(&smm->ll, b, &ev);
 403                r2 = out(smm);
 404        }
 405
 406        return combine_errors(r, r2);
 407}
 408
 409static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
 410{
 411        int r, r2 = 0;
 412        enum allocation_event ev;
 413        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 414
 415        if (recursing(smm))
 416                r = add_bop(smm, BOP_DEC, b);
 417        else {
 418                in(smm);
 419                r = sm_ll_dec(&smm->ll, b, &ev);
 420                r2 = out(smm);
 421        }
 422
 423        return combine_errors(r, r2);
 424}
 425
 426static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 427{
 428        int r, r2 = 0;
 429        enum allocation_event ev;
 430        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 431
 432        r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b);
 433        if (r)
 434                return r;
 435
 436        smm->begin = *b + 1;
 437
 438        if (recursing(smm))
 439                r = add_bop(smm, BOP_INC, *b);
 440        else {
 441                in(smm);
 442                r = sm_ll_inc(&smm->ll, *b, &ev);
 443                r2 = out(smm);
 444        }
 445
 446        if (!r)
 447                smm->allocated_this_transaction++;
 448
 449        return combine_errors(r, r2);
 450}
 451
 452static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
 453{
 454        dm_block_t count;
 455        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 456
 457        int r = sm_metadata_new_block_(sm, b);
 458        if (r) {
 459                DMERR_LIMIT("unable to allocate new metadata block");
 460                return r;
 461        }
 462
 463        r = sm_metadata_get_nr_free(sm, &count);
 464        if (r) {
 465                DMERR_LIMIT("couldn't get free block count");
 466                return r;
 467        }
 468
 469        check_threshold(&smm->threshold, count);
 470
 471        return r;
 472}
 473
 474static int sm_metadata_commit(struct dm_space_map *sm)
 475{
 476        int r;
 477        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 478
 479        r = sm_ll_commit(&smm->ll);
 480        if (r)
 481                return r;
 482
 483        memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
 484        smm->begin = 0;
 485        smm->allocated_this_transaction = 0;
 486
 487        return 0;
 488}
 489
 490static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
 491                                                   dm_block_t threshold,
 492                                                   dm_sm_threshold_fn fn,
 493                                                   void *context)
 494{
 495        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 496
 497        set_threshold(&smm->threshold, threshold, fn, context);
 498
 499        return 0;
 500}
 501
 502static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
 503{
 504        *result = sizeof(struct disk_sm_root);
 505
 506        return 0;
 507}
 508
 509static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
 510{
 511        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 512        struct disk_sm_root root_le;
 513
 514        root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
 515        root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
 516        root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
 517        root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
 518
 519        if (max < sizeof(root_le))
 520                return -ENOSPC;
 521
 522        memcpy(where_le, &root_le, sizeof(root_le));
 523
 524        return 0;
 525}
 526
 527static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
 528
 529static struct dm_space_map ops = {
 530        .destroy = sm_metadata_destroy,
 531        .extend = sm_metadata_extend,
 532        .get_nr_blocks = sm_metadata_get_nr_blocks,
 533        .get_nr_free = sm_metadata_get_nr_free,
 534        .get_count = sm_metadata_get_count,
 535        .count_is_more_than_one = sm_metadata_count_is_more_than_one,
 536        .set_count = sm_metadata_set_count,
 537        .inc_block = sm_metadata_inc_block,
 538        .dec_block = sm_metadata_dec_block,
 539        .new_block = sm_metadata_new_block,
 540        .commit = sm_metadata_commit,
 541        .root_size = sm_metadata_root_size,
 542        .copy_root = sm_metadata_copy_root,
 543        .register_threshold_callback = sm_metadata_register_threshold_callback
 544};
 545
 546/*----------------------------------------------------------------*/
 547
 548/*
 549 * When a new space map is created that manages its own space.  We use
 550 * this tiny bootstrap allocator.
 551 */
 552static void sm_bootstrap_destroy(struct dm_space_map *sm)
 553{
 554}
 555
 556static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 557{
 558        DMERR("bootstrap doesn't support extend");
 559
 560        return -EINVAL;
 561}
 562
 563static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
 564{
 565        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 566
 567        return smm->ll.nr_blocks;
 568}
 569
 570static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
 571{
 572        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 573
 574        *count = smm->ll.nr_blocks - smm->begin;
 575
 576        return 0;
 577}
 578
 579static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
 580                                  uint32_t *result)
 581{
 582        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 583
 584        return b < smm->begin ? 1 : 0;
 585}
 586
 587static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
 588                                               dm_block_t b, int *result)
 589{
 590        *result = 0;
 591
 592        return 0;
 593}
 594
 595static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
 596                                  uint32_t count)
 597{
 598        DMERR("bootstrap doesn't support set_count");
 599
 600        return -EINVAL;
 601}
 602
 603static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
 604{
 605        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 606
 607        /*
 608         * We know the entire device is unused.
 609         */
 610        if (smm->begin == smm->ll.nr_blocks)
 611                return -ENOSPC;
 612
 613        *b = smm->begin++;
 614
 615        return 0;
 616}
 617
 618static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
 619{
 620        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 621
 622        return add_bop(smm, BOP_INC, b);
 623}
 624
 625static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
 626{
 627        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 628
 629        return add_bop(smm, BOP_DEC, b);
 630}
 631
 632static int sm_bootstrap_commit(struct dm_space_map *sm)
 633{
 634        return 0;
 635}
 636
 637static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
 638{
 639        DMERR("bootstrap doesn't support root_size");
 640
 641        return -EINVAL;
 642}
 643
 644static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
 645                                  size_t max)
 646{
 647        DMERR("bootstrap doesn't support copy_root");
 648
 649        return -EINVAL;
 650}
 651
 652static struct dm_space_map bootstrap_ops = {
 653        .destroy = sm_bootstrap_destroy,
 654        .extend = sm_bootstrap_extend,
 655        .get_nr_blocks = sm_bootstrap_get_nr_blocks,
 656        .get_nr_free = sm_bootstrap_get_nr_free,
 657        .get_count = sm_bootstrap_get_count,
 658        .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
 659        .set_count = sm_bootstrap_set_count,
 660        .inc_block = sm_bootstrap_inc_block,
 661        .dec_block = sm_bootstrap_dec_block,
 662        .new_block = sm_bootstrap_new_block,
 663        .commit = sm_bootstrap_commit,
 664        .root_size = sm_bootstrap_root_size,
 665        .copy_root = sm_bootstrap_copy_root,
 666        .register_threshold_callback = NULL
 667};
 668
 669/*----------------------------------------------------------------*/
 670
 671static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 672{
 673        int r, i;
 674        enum allocation_event ev;
 675        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 676        dm_block_t old_len = smm->ll.nr_blocks;
 677
 678        /*
 679         * Flick into a mode where all blocks get allocated in the new area.
 680         */
 681        smm->begin = old_len;
 682        memcpy(sm, &bootstrap_ops, sizeof(*sm));
 683
 684        /*
 685         * Extend.
 686         */
 687        r = sm_ll_extend(&smm->ll, extra_blocks);
 688        if (r)
 689                goto out;
 690
 691        /*
 692         * We repeatedly increment then commit until the commit doesn't
 693         * allocate any new blocks.
 694         */
 695        do {
 696                for (i = old_len; !r && i < smm->begin; i++) {
 697                        r = sm_ll_inc(&smm->ll, i, &ev);
 698                        if (r)
 699                                goto out;
 700                }
 701                old_len = smm->begin;
 702
 703                r = sm_ll_commit(&smm->ll);
 704                if (r)
 705                        goto out;
 706
 707        } while (old_len != smm->begin);
 708
 709out:
 710        /*
 711         * Switch back to normal behaviour.
 712         */
 713        memcpy(sm, &ops, sizeof(*sm));
 714        return r;
 715}
 716
 717/*----------------------------------------------------------------*/
 718
 719struct dm_space_map *dm_sm_metadata_init(void)
 720{
 721        struct sm_metadata *smm;
 722
 723        smm = kmalloc(sizeof(*smm), GFP_KERNEL);
 724        if (!smm)
 725                return ERR_PTR(-ENOMEM);
 726
 727        memcpy(&smm->sm, &ops, sizeof(smm->sm));
 728
 729        return &smm->sm;
 730}
 731
 732int dm_sm_metadata_create(struct dm_space_map *sm,
 733                          struct dm_transaction_manager *tm,
 734                          dm_block_t nr_blocks,
 735                          dm_block_t superblock)
 736{
 737        int r;
 738        dm_block_t i;
 739        enum allocation_event ev;
 740        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 741
 742        smm->begin = superblock + 1;
 743        smm->recursion_count = 0;
 744        smm->allocated_this_transaction = 0;
 745        brb_init(&smm->uncommitted);
 746        threshold_init(&smm->threshold);
 747
 748        memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
 749
 750        r = sm_ll_new_metadata(&smm->ll, tm);
 751        if (r)
 752                return r;
 753
 754        if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
 755                nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
 756        r = sm_ll_extend(&smm->ll, nr_blocks);
 757        if (r)
 758                return r;
 759
 760        memcpy(&smm->sm, &ops, sizeof(smm->sm));
 761
 762        /*
 763         * Now we need to update the newly created data structures with the
 764         * allocated blocks that they were built from.
 765         */
 766        for (i = superblock; !r && i < smm->begin; i++)
 767                r = sm_ll_inc(&smm->ll, i, &ev);
 768
 769        if (r)
 770                return r;
 771
 772        return sm_metadata_commit(sm);
 773}
 774
 775int dm_sm_metadata_open(struct dm_space_map *sm,
 776                        struct dm_transaction_manager *tm,
 777                        void *root_le, size_t len)
 778{
 779        int r;
 780        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 781
 782        r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
 783        if (r)
 784                return r;
 785
 786        smm->begin = 0;
 787        smm->recursion_count = 0;
 788        smm->allocated_this_transaction = 0;
 789        brb_init(&smm->uncommitted);
 790        threshold_init(&smm->threshold);
 791
 792        memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
 793        return 0;
 794}
 795