linux/drivers/md/dm-cache-metadata.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Red Hat, Inc.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-cache-metadata.h"
   8
   9#include "persistent-data/dm-array.h"
  10#include "persistent-data/dm-bitset.h"
  11#include "persistent-data/dm-space-map.h"
  12#include "persistent-data/dm-space-map-disk.h"
  13#include "persistent-data/dm-transaction-manager.h"
  14
  15#include <linux/device-mapper.h>
  16
  17/*----------------------------------------------------------------*/
  18
  19#define DM_MSG_PREFIX   "cache metadata"
  20
  21#define CACHE_SUPERBLOCK_MAGIC 06142003
  22#define CACHE_SUPERBLOCK_LOCATION 0
  23#define CACHE_VERSION 1
  24#define CACHE_METADATA_CACHE_SIZE 64
  25
  26/*
  27 *  3 for btree insert +
  28 *  2 for btree lookup used within space map
  29 */
  30#define CACHE_MAX_CONCURRENT_LOCKS 5
  31#define SPACE_MAP_ROOT_SIZE 128
  32
  33enum superblock_flag_bits {
  34        /* for spotting crashes that would invalidate the dirty bitset */
  35        CLEAN_SHUTDOWN,
  36};
  37
  38/*
  39 * Each mapping from cache block -> origin block carries a set of flags.
  40 */
  41enum mapping_bits {
  42        /*
  43         * A valid mapping.  Because we're using an array we clear this
  44         * flag for an non existant mapping.
  45         */
  46        M_VALID = 1,
  47
  48        /*
  49         * The data on the cache is different from that on the origin.
  50         */
  51        M_DIRTY = 2
  52};
  53
  54struct cache_disk_superblock {
  55        __le32 csum;
  56        __le32 flags;
  57        __le64 blocknr;
  58
  59        __u8 uuid[16];
  60        __le64 magic;
  61        __le32 version;
  62
  63        __u8 policy_name[CACHE_POLICY_NAME_SIZE];
  64        __le32 policy_hint_size;
  65
  66        __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
  67        __le64 mapping_root;
  68        __le64 hint_root;
  69
  70        __le64 discard_root;
  71        __le64 discard_block_size;
  72        __le64 discard_nr_blocks;
  73
  74        __le32 data_block_size;
  75        __le32 metadata_block_size;
  76        __le32 cache_blocks;
  77
  78        __le32 compat_flags;
  79        __le32 compat_ro_flags;
  80        __le32 incompat_flags;
  81
  82        __le32 read_hits;
  83        __le32 read_misses;
  84        __le32 write_hits;
  85        __le32 write_misses;
  86
  87        __le32 policy_version[CACHE_POLICY_VERSION_SIZE];
  88} __packed;
  89
  90struct dm_cache_metadata {
  91        struct block_device *bdev;
  92        struct dm_block_manager *bm;
  93        struct dm_space_map *metadata_sm;
  94        struct dm_transaction_manager *tm;
  95
  96        struct dm_array_info info;
  97        struct dm_array_info hint_info;
  98        struct dm_disk_bitset discard_info;
  99
 100        struct rw_semaphore root_lock;
 101        dm_block_t root;
 102        dm_block_t hint_root;
 103        dm_block_t discard_root;
 104
 105        sector_t discard_block_size;
 106        dm_dblock_t discard_nr_blocks;
 107
 108        sector_t data_block_size;
 109        dm_cblock_t cache_blocks;
 110        bool changed:1;
 111        bool clean_when_opened:1;
 112
 113        char policy_name[CACHE_POLICY_NAME_SIZE];
 114        unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
 115        size_t policy_hint_size;
 116        struct dm_cache_statistics stats;
 117};
 118
 119/*-------------------------------------------------------------------
 120 * superblock validator
 121 *-----------------------------------------------------------------*/
 122
 123#define SUPERBLOCK_CSUM_XOR 9031977
 124
 125static void sb_prepare_for_write(struct dm_block_validator *v,
 126                                 struct dm_block *b,
 127                                 size_t sb_block_size)
 128{
 129        struct cache_disk_superblock *disk_super = dm_block_data(b);
 130
 131        disk_super->blocknr = cpu_to_le64(dm_block_location(b));
 132        disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
 133                                                      sb_block_size - sizeof(__le32),
 134                                                      SUPERBLOCK_CSUM_XOR));
 135}
 136
 137static int sb_check(struct dm_block_validator *v,
 138                    struct dm_block *b,
 139                    size_t sb_block_size)
 140{
 141        struct cache_disk_superblock *disk_super = dm_block_data(b);
 142        __le32 csum_le;
 143
 144        if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
 145                DMERR("sb_check failed: blocknr %llu: wanted %llu",
 146                      le64_to_cpu(disk_super->blocknr),
 147                      (unsigned long long)dm_block_location(b));
 148                return -ENOTBLK;
 149        }
 150
 151        if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
 152                DMERR("sb_check failed: magic %llu: wanted %llu",
 153                      le64_to_cpu(disk_super->magic),
 154                      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
 155                return -EILSEQ;
 156        }
 157
 158        csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
 159                                             sb_block_size - sizeof(__le32),
 160                                             SUPERBLOCK_CSUM_XOR));
 161        if (csum_le != disk_super->csum) {
 162                DMERR("sb_check failed: csum %u: wanted %u",
 163                      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
 164                return -EILSEQ;
 165        }
 166
 167        return 0;
 168}
 169
 170static struct dm_block_validator sb_validator = {
 171        .name = "superblock",
 172        .prepare_for_write = sb_prepare_for_write,
 173        .check = sb_check
 174};
 175
 176/*----------------------------------------------------------------*/
 177
 178static int superblock_read_lock(struct dm_cache_metadata *cmd,
 179                                struct dm_block **sblock)
 180{
 181        return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 182                               &sb_validator, sblock);
 183}
 184
 185static int superblock_lock_zero(struct dm_cache_metadata *cmd,
 186                                struct dm_block **sblock)
 187{
 188        return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 189                                     &sb_validator, sblock);
 190}
 191
 192static int superblock_lock(struct dm_cache_metadata *cmd,
 193                           struct dm_block **sblock)
 194{
 195        return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 196                                &sb_validator, sblock);
 197}
 198
 199/*----------------------------------------------------------------*/
 200
 201static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
 202{
 203        int r;
 204        unsigned i;
 205        struct dm_block *b;
 206        __le64 *data_le, zero = cpu_to_le64(0);
 207        unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
 208
 209        /*
 210         * We can't use a validator here - it may be all zeroes.
 211         */
 212        r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
 213        if (r)
 214                return r;
 215
 216        data_le = dm_block_data(b);
 217        *result = 1;
 218        for (i = 0; i < sb_block_size; i++) {
 219                if (data_le[i] != zero) {
 220                        *result = 0;
 221                        break;
 222                }
 223        }
 224
 225        return dm_bm_unlock(b);
 226}
 227
 228static void __setup_mapping_info(struct dm_cache_metadata *cmd)
 229{
 230        struct dm_btree_value_type vt;
 231
 232        vt.context = NULL;
 233        vt.size = sizeof(__le64);
 234        vt.inc = NULL;
 235        vt.dec = NULL;
 236        vt.equal = NULL;
 237        dm_array_info_init(&cmd->info, cmd->tm, &vt);
 238
 239        if (cmd->policy_hint_size) {
 240                vt.size = sizeof(__le32);
 241                dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
 242        }
 243}
 244
 245static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 246{
 247        int r;
 248        struct dm_block *sblock;
 249        size_t metadata_len;
 250        struct cache_disk_superblock *disk_super;
 251        sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
 252
 253        /* FIXME: see if we can lose the max sectors limit */
 254        if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
 255                bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
 256
 257        r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
 258        if (r < 0)
 259                return r;
 260
 261        r = dm_tm_pre_commit(cmd->tm);
 262        if (r < 0)
 263                return r;
 264
 265        r = superblock_lock_zero(cmd, &sblock);
 266        if (r)
 267                return r;
 268
 269        disk_super = dm_block_data(sblock);
 270        disk_super->flags = 0;
 271        memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
 272        disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
 273        disk_super->version = cpu_to_le32(CACHE_VERSION);
 274        memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
 275        memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
 276        disk_super->policy_hint_size = 0;
 277
 278        r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
 279                            metadata_len);
 280        if (r < 0)
 281                goto bad_locked;
 282
 283        disk_super->mapping_root = cpu_to_le64(cmd->root);
 284        disk_super->hint_root = cpu_to_le64(cmd->hint_root);
 285        disk_super->discard_root = cpu_to_le64(cmd->discard_root);
 286        disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
 287        disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
 288        disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
 289        disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
 290        disk_super->cache_blocks = cpu_to_le32(0);
 291
 292        disk_super->read_hits = cpu_to_le32(0);
 293        disk_super->read_misses = cpu_to_le32(0);
 294        disk_super->write_hits = cpu_to_le32(0);
 295        disk_super->write_misses = cpu_to_le32(0);
 296
 297        return dm_tm_commit(cmd->tm, sblock);
 298
 299bad_locked:
 300        dm_bm_unlock(sblock);
 301        return r;
 302}
 303
 304static int __format_metadata(struct dm_cache_metadata *cmd)
 305{
 306        int r;
 307
 308        r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 309                                 &cmd->tm, &cmd->metadata_sm);
 310        if (r < 0) {
 311                DMERR("tm_create_with_sm failed");
 312                return r;
 313        }
 314
 315        __setup_mapping_info(cmd);
 316
 317        r = dm_array_empty(&cmd->info, &cmd->root);
 318        if (r < 0)
 319                goto bad;
 320
 321        dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
 322
 323        r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
 324        if (r < 0)
 325                goto bad;
 326
 327        cmd->discard_block_size = 0;
 328        cmd->discard_nr_blocks = 0;
 329
 330        r = __write_initial_superblock(cmd);
 331        if (r)
 332                goto bad;
 333
 334        cmd->clean_when_opened = true;
 335        return 0;
 336
 337bad:
 338        dm_tm_destroy(cmd->tm);
 339        dm_sm_destroy(cmd->metadata_sm);
 340
 341        return r;
 342}
 343
 344static int __check_incompat_features(struct cache_disk_superblock *disk_super,
 345                                     struct dm_cache_metadata *cmd)
 346{
 347        uint32_t features;
 348
 349        features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
 350        if (features) {
 351                DMERR("could not access metadata due to unsupported optional features (%lx).",
 352                      (unsigned long)features);
 353                return -EINVAL;
 354        }
 355
 356        /*
 357         * Check for read-only metadata to skip the following RDWR checks.
 358         */
 359        if (get_disk_ro(cmd->bdev->bd_disk))
 360                return 0;
 361
 362        features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
 363        if (features) {
 364                DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
 365                      (unsigned long)features);
 366                return -EINVAL;
 367        }
 368
 369        return 0;
 370}
 371
 372static int __open_metadata(struct dm_cache_metadata *cmd)
 373{
 374        int r;
 375        struct dm_block *sblock;
 376        struct cache_disk_superblock *disk_super;
 377        unsigned long sb_flags;
 378
 379        r = superblock_read_lock(cmd, &sblock);
 380        if (r < 0) {
 381                DMERR("couldn't read lock superblock");
 382                return r;
 383        }
 384
 385        disk_super = dm_block_data(sblock);
 386
 387        r = __check_incompat_features(disk_super, cmd);
 388        if (r < 0)
 389                goto bad;
 390
 391        r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 392                               disk_super->metadata_space_map_root,
 393                               sizeof(disk_super->metadata_space_map_root),
 394                               &cmd->tm, &cmd->metadata_sm);
 395        if (r < 0) {
 396                DMERR("tm_open_with_sm failed");
 397                goto bad;
 398        }
 399
 400        __setup_mapping_info(cmd);
 401        dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
 402        sb_flags = le32_to_cpu(disk_super->flags);
 403        cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
 404        return dm_bm_unlock(sblock);
 405
 406bad:
 407        dm_bm_unlock(sblock);
 408        return r;
 409}
 410
 411static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
 412                                     bool format_device)
 413{
 414        int r, unformatted;
 415
 416        r = __superblock_all_zeroes(cmd->bm, &unformatted);
 417        if (r)
 418                return r;
 419
 420        if (unformatted)
 421                return format_device ? __format_metadata(cmd) : -EPERM;
 422
 423        return __open_metadata(cmd);
 424}
 425
 426static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
 427                                            bool may_format_device)
 428{
 429        int r;
 430        cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
 431                                          CACHE_METADATA_CACHE_SIZE,
 432                                          CACHE_MAX_CONCURRENT_LOCKS);
 433        if (IS_ERR(cmd->bm)) {
 434                DMERR("could not create block manager");
 435                return PTR_ERR(cmd->bm);
 436        }
 437
 438        r = __open_or_format_metadata(cmd, may_format_device);
 439        if (r)
 440                dm_block_manager_destroy(cmd->bm);
 441
 442        return r;
 443}
 444
 445static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
 446{
 447        dm_sm_destroy(cmd->metadata_sm);
 448        dm_tm_destroy(cmd->tm);
 449        dm_block_manager_destroy(cmd->bm);
 450}
 451
 452typedef unsigned long (*flags_mutator)(unsigned long);
 453
 454static void update_flags(struct cache_disk_superblock *disk_super,
 455                         flags_mutator mutator)
 456{
 457        uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
 458        disk_super->flags = cpu_to_le32(sb_flags);
 459}
 460
 461static unsigned long set_clean_shutdown(unsigned long flags)
 462{
 463        set_bit(CLEAN_SHUTDOWN, &flags);
 464        return flags;
 465}
 466
 467static unsigned long clear_clean_shutdown(unsigned long flags)
 468{
 469        clear_bit(CLEAN_SHUTDOWN, &flags);
 470        return flags;
 471}
 472
 473static void read_superblock_fields(struct dm_cache_metadata *cmd,
 474                                   struct cache_disk_superblock *disk_super)
 475{
 476        cmd->root = le64_to_cpu(disk_super->mapping_root);
 477        cmd->hint_root = le64_to_cpu(disk_super->hint_root);
 478        cmd->discard_root = le64_to_cpu(disk_super->discard_root);
 479        cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
 480        cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
 481        cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
 482        cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
 483        strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
 484        cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
 485        cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
 486        cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
 487        cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
 488
 489        cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
 490        cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
 491        cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
 492        cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
 493
 494        cmd->changed = false;
 495}
 496
 497/*
 498 * The mutator updates the superblock flags.
 499 */
 500static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
 501                                     flags_mutator mutator)
 502{
 503        int r;
 504        struct cache_disk_superblock *disk_super;
 505        struct dm_block *sblock;
 506
 507        r = superblock_lock(cmd, &sblock);
 508        if (r)
 509                return r;
 510
 511        disk_super = dm_block_data(sblock);
 512        update_flags(disk_super, mutator);
 513        read_superblock_fields(cmd, disk_super);
 514
 515        return dm_bm_flush_and_unlock(cmd->bm, sblock);
 516}
 517
 518static int __begin_transaction(struct dm_cache_metadata *cmd)
 519{
 520        int r;
 521        struct cache_disk_superblock *disk_super;
 522        struct dm_block *sblock;
 523
 524        /*
 525         * We re-read the superblock every time.  Shouldn't need to do this
 526         * really.
 527         */
 528        r = superblock_read_lock(cmd, &sblock);
 529        if (r)
 530                return r;
 531
 532        disk_super = dm_block_data(sblock);
 533        read_superblock_fields(cmd, disk_super);
 534        dm_bm_unlock(sblock);
 535
 536        return 0;
 537}
 538
 539static int __commit_transaction(struct dm_cache_metadata *cmd,
 540                                flags_mutator mutator)
 541{
 542        int r;
 543        size_t metadata_len;
 544        struct cache_disk_superblock *disk_super;
 545        struct dm_block *sblock;
 546
 547        /*
 548         * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
 549         */
 550        BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
 551
 552        r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
 553                            &cmd->discard_root);
 554        if (r)
 555                return r;
 556
 557        r = dm_tm_pre_commit(cmd->tm);
 558        if (r < 0)
 559                return r;
 560
 561        r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
 562        if (r < 0)
 563                return r;
 564
 565        r = superblock_lock(cmd, &sblock);
 566        if (r)
 567                return r;
 568
 569        disk_super = dm_block_data(sblock);
 570
 571        if (mutator)
 572                update_flags(disk_super, mutator);
 573
 574        disk_super->mapping_root = cpu_to_le64(cmd->root);
 575        disk_super->hint_root = cpu_to_le64(cmd->hint_root);
 576        disk_super->discard_root = cpu_to_le64(cmd->discard_root);
 577        disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
 578        disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
 579        disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
 580        strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
 581        disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
 582        disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
 583        disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
 584
 585        disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
 586        disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
 587        disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
 588        disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
 589
 590        r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
 591                            metadata_len);
 592        if (r < 0) {
 593                dm_bm_unlock(sblock);
 594                return r;
 595        }
 596
 597        return dm_tm_commit(cmd->tm, sblock);
 598}
 599
 600/*----------------------------------------------------------------*/
 601
 602/*
 603 * The mappings are held in a dm-array that has 64-bit values stored in
 604 * little-endian format.  The index is the cblock, the high 48bits of the
 605 * value are the oblock and the low 16 bit the flags.
 606 */
 607#define FLAGS_MASK ((1 << 16) - 1)
 608
 609static __le64 pack_value(dm_oblock_t block, unsigned flags)
 610{
 611        uint64_t value = from_oblock(block);
 612        value <<= 16;
 613        value = value | (flags & FLAGS_MASK);
 614        return cpu_to_le64(value);
 615}
 616
 617static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
 618{
 619        uint64_t value = le64_to_cpu(value_le);
 620        uint64_t b = value >> 16;
 621        *block = to_oblock(b);
 622        *flags = value & FLAGS_MASK;
 623}
 624
 625/*----------------------------------------------------------------*/
 626
 627struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
 628                                                 sector_t data_block_size,
 629                                                 bool may_format_device,
 630                                                 size_t policy_hint_size)
 631{
 632        int r;
 633        struct dm_cache_metadata *cmd;
 634
 635        cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 636        if (!cmd) {
 637                DMERR("could not allocate metadata struct");
 638                return NULL;
 639        }
 640
 641        init_rwsem(&cmd->root_lock);
 642        cmd->bdev = bdev;
 643        cmd->data_block_size = data_block_size;
 644        cmd->cache_blocks = 0;
 645        cmd->policy_hint_size = policy_hint_size;
 646        cmd->changed = true;
 647
 648        r = __create_persistent_data_objects(cmd, may_format_device);
 649        if (r) {
 650                kfree(cmd);
 651                return ERR_PTR(r);
 652        }
 653
 654        r = __begin_transaction_flags(cmd, clear_clean_shutdown);
 655        if (r < 0) {
 656                dm_cache_metadata_close(cmd);
 657                return ERR_PTR(r);
 658        }
 659
 660        return cmd;
 661}
 662
 663void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
 664{
 665        __destroy_persistent_data_objects(cmd);
 666        kfree(cmd);
 667}
 668
 669int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
 670{
 671        int r;
 672        __le64 null_mapping = pack_value(0, 0);
 673
 674        down_write(&cmd->root_lock);
 675        __dm_bless_for_disk(&null_mapping);
 676        r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
 677                            from_cblock(new_cache_size),
 678                            &null_mapping, &cmd->root);
 679        if (!r)
 680                cmd->cache_blocks = new_cache_size;
 681        cmd->changed = true;
 682        up_write(&cmd->root_lock);
 683
 684        return r;
 685}
 686
 687int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
 688                                   sector_t discard_block_size,
 689                                   dm_dblock_t new_nr_entries)
 690{
 691        int r;
 692
 693        down_write(&cmd->root_lock);
 694        r = dm_bitset_resize(&cmd->discard_info,
 695                             cmd->discard_root,
 696                             from_dblock(cmd->discard_nr_blocks),
 697                             from_dblock(new_nr_entries),
 698                             false, &cmd->discard_root);
 699        if (!r) {
 700                cmd->discard_block_size = discard_block_size;
 701                cmd->discard_nr_blocks = new_nr_entries;
 702        }
 703
 704        cmd->changed = true;
 705        up_write(&cmd->root_lock);
 706
 707        return r;
 708}
 709
 710static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
 711{
 712        return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
 713                                 from_dblock(b), &cmd->discard_root);
 714}
 715
 716static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
 717{
 718        return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
 719                                   from_dblock(b), &cmd->discard_root);
 720}
 721
 722static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
 723                          bool *is_discarded)
 724{
 725        return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
 726                                  from_dblock(b), &cmd->discard_root,
 727                                  is_discarded);
 728}
 729
 730static int __discard(struct dm_cache_metadata *cmd,
 731                     dm_dblock_t dblock, bool discard)
 732{
 733        int r;
 734
 735        r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
 736        if (r)
 737                return r;
 738
 739        cmd->changed = true;
 740        return 0;
 741}
 742
 743int dm_cache_set_discard(struct dm_cache_metadata *cmd,
 744                         dm_dblock_t dblock, bool discard)
 745{
 746        int r;
 747
 748        down_write(&cmd->root_lock);
 749        r = __discard(cmd, dblock, discard);
 750        up_write(&cmd->root_lock);
 751
 752        return r;
 753}
 754
 755static int __load_discards(struct dm_cache_metadata *cmd,
 756                           load_discard_fn fn, void *context)
 757{
 758        int r = 0;
 759        dm_block_t b;
 760        bool discard;
 761
 762        for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
 763                dm_dblock_t dblock = to_dblock(b);
 764
 765                if (cmd->clean_when_opened) {
 766                        r = __is_discarded(cmd, dblock, &discard);
 767                        if (r)
 768                                return r;
 769                } else
 770                        discard = false;
 771
 772                r = fn(context, cmd->discard_block_size, dblock, discard);
 773                if (r)
 774                        break;
 775        }
 776
 777        return r;
 778}
 779
 780int dm_cache_load_discards(struct dm_cache_metadata *cmd,
 781                           load_discard_fn fn, void *context)
 782{
 783        int r;
 784
 785        down_read(&cmd->root_lock);
 786        r = __load_discards(cmd, fn, context);
 787        up_read(&cmd->root_lock);
 788
 789        return r;
 790}
 791
 792dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd)
 793{
 794        dm_cblock_t r;
 795
 796        down_read(&cmd->root_lock);
 797        r = cmd->cache_blocks;
 798        up_read(&cmd->root_lock);
 799
 800        return r;
 801}
 802
 803static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
 804{
 805        int r;
 806        __le64 value = pack_value(0, 0);
 807
 808        __dm_bless_for_disk(&value);
 809        r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
 810                               &value, &cmd->root);
 811        if (r)
 812                return r;
 813
 814        cmd->changed = true;
 815        return 0;
 816}
 817
 818int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
 819{
 820        int r;
 821
 822        down_write(&cmd->root_lock);
 823        r = __remove(cmd, cblock);
 824        up_write(&cmd->root_lock);
 825
 826        return r;
 827}
 828
 829static int __insert(struct dm_cache_metadata *cmd,
 830                    dm_cblock_t cblock, dm_oblock_t oblock)
 831{
 832        int r;
 833        __le64 value = pack_value(oblock, M_VALID);
 834        __dm_bless_for_disk(&value);
 835
 836        r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
 837                               &value, &cmd->root);
 838        if (r)
 839                return r;
 840
 841        cmd->changed = true;
 842        return 0;
 843}
 844
 845int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
 846                            dm_cblock_t cblock, dm_oblock_t oblock)
 847{
 848        int r;
 849
 850        down_write(&cmd->root_lock);
 851        r = __insert(cmd, cblock, oblock);
 852        up_write(&cmd->root_lock);
 853
 854        return r;
 855}
 856
 857struct thunk {
 858        load_mapping_fn fn;
 859        void *context;
 860
 861        struct dm_cache_metadata *cmd;
 862        bool respect_dirty_flags;
 863        bool hints_valid;
 864};
 865
 866static bool policy_unchanged(struct dm_cache_metadata *cmd,
 867                             struct dm_cache_policy *policy)
 868{
 869        const char *policy_name = dm_cache_policy_get_name(policy);
 870        const unsigned *policy_version = dm_cache_policy_get_version(policy);
 871        size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
 872
 873        /*
 874         * Ensure policy names match.
 875         */
 876        if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
 877                return false;
 878
 879        /*
 880         * Ensure policy major versions match.
 881         */
 882        if (cmd->policy_version[0] != policy_version[0])
 883                return false;
 884
 885        /*
 886         * Ensure policy hint sizes match.
 887         */
 888        if (cmd->policy_hint_size != policy_hint_size)
 889                return false;
 890
 891        return true;
 892}
 893
 894static bool hints_array_initialized(struct dm_cache_metadata *cmd)
 895{
 896        return cmd->hint_root && cmd->policy_hint_size;
 897}
 898
 899static bool hints_array_available(struct dm_cache_metadata *cmd,
 900                                  struct dm_cache_policy *policy)
 901{
 902        return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
 903                hints_array_initialized(cmd);
 904}
 905
 906static int __load_mapping(void *context, uint64_t cblock, void *leaf)
 907{
 908        int r = 0;
 909        bool dirty;
 910        __le64 value;
 911        __le32 hint_value = 0;
 912        dm_oblock_t oblock;
 913        unsigned flags;
 914        struct thunk *thunk = context;
 915        struct dm_cache_metadata *cmd = thunk->cmd;
 916
 917        memcpy(&value, leaf, sizeof(value));
 918        unpack_value(value, &oblock, &flags);
 919
 920        if (flags & M_VALID) {
 921                if (thunk->hints_valid) {
 922                        r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
 923                                               cblock, &hint_value);
 924                        if (r && r != -ENODATA)
 925                                return r;
 926                }
 927
 928                dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
 929                r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
 930                              dirty, le32_to_cpu(hint_value), thunk->hints_valid);
 931        }
 932
 933        return r;
 934}
 935
 936static int __load_mappings(struct dm_cache_metadata *cmd,
 937                           struct dm_cache_policy *policy,
 938                           load_mapping_fn fn, void *context)
 939{
 940        struct thunk thunk;
 941
 942        thunk.fn = fn;
 943        thunk.context = context;
 944
 945        thunk.cmd = cmd;
 946        thunk.respect_dirty_flags = cmd->clean_when_opened;
 947        thunk.hints_valid = hints_array_available(cmd, policy);
 948
 949        return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
 950}
 951
 952int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
 953                           struct dm_cache_policy *policy,
 954                           load_mapping_fn fn, void *context)
 955{
 956        int r;
 957
 958        down_read(&cmd->root_lock);
 959        r = __load_mappings(cmd, policy, fn, context);
 960        up_read(&cmd->root_lock);
 961
 962        return r;
 963}
 964
 965static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
 966{
 967        int r = 0;
 968        __le64 value;
 969        dm_oblock_t oblock;
 970        unsigned flags;
 971
 972        memcpy(&value, leaf, sizeof(value));
 973        unpack_value(value, &oblock, &flags);
 974
 975        return r;
 976}
 977
 978static int __dump_mappings(struct dm_cache_metadata *cmd)
 979{
 980        return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
 981}
 982
 983void dm_cache_dump(struct dm_cache_metadata *cmd)
 984{
 985        down_read(&cmd->root_lock);
 986        __dump_mappings(cmd);
 987        up_read(&cmd->root_lock);
 988}
 989
 990int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
 991{
 992        int r;
 993
 994        down_read(&cmd->root_lock);
 995        r = cmd->changed;
 996        up_read(&cmd->root_lock);
 997
 998        return r;
 999}
1000
1001static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1002{
1003        int r;
1004        unsigned flags;
1005        dm_oblock_t oblock;
1006        __le64 value;
1007
1008        r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1009        if (r)
1010                return r;
1011
1012        unpack_value(value, &oblock, &flags);
1013
1014        if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1015                /* nothing to be done */
1016                return 0;
1017
1018        value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1019        __dm_bless_for_disk(&value);
1020
1021        r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1022                               &value, &cmd->root);
1023        if (r)
1024                return r;
1025
1026        cmd->changed = true;
1027        return 0;
1028
1029}
1030
1031int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
1032                       dm_cblock_t cblock, bool dirty)
1033{
1034        int r;
1035
1036        down_write(&cmd->root_lock);
1037        r = __dirty(cmd, cblock, dirty);
1038        up_write(&cmd->root_lock);
1039
1040        return r;
1041}
1042
1043void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1044                                 struct dm_cache_statistics *stats)
1045{
1046        down_read(&cmd->root_lock);
1047        *stats = cmd->stats;
1048        up_read(&cmd->root_lock);
1049}
1050
1051void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1052                                 struct dm_cache_statistics *stats)
1053{
1054        down_write(&cmd->root_lock);
1055        cmd->stats = *stats;
1056        up_write(&cmd->root_lock);
1057}
1058
1059int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1060{
1061        int r;
1062        flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1063                                 clear_clean_shutdown);
1064
1065        down_write(&cmd->root_lock);
1066        r = __commit_transaction(cmd, mutator);
1067        if (r)
1068                goto out;
1069
1070        r = __begin_transaction(cmd);
1071
1072out:
1073        up_write(&cmd->root_lock);
1074        return r;
1075}
1076
1077int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1078                                           dm_block_t *result)
1079{
1080        int r = -EINVAL;
1081
1082        down_read(&cmd->root_lock);
1083        r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1084        up_read(&cmd->root_lock);
1085
1086        return r;
1087}
1088
1089int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1090                                   dm_block_t *result)
1091{
1092        int r = -EINVAL;
1093
1094        down_read(&cmd->root_lock);
1095        r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1096        up_read(&cmd->root_lock);
1097
1098        return r;
1099}
1100
1101/*----------------------------------------------------------------*/
1102
1103static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1104{
1105        int r;
1106        __le32 value;
1107        size_t hint_size;
1108        const char *policy_name = dm_cache_policy_get_name(policy);
1109        const unsigned *policy_version = dm_cache_policy_get_version(policy);
1110
1111        if (!policy_name[0] ||
1112            (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1113                return -EINVAL;
1114
1115        if (!policy_unchanged(cmd, policy)) {
1116                strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1117                memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1118
1119                hint_size = dm_cache_policy_get_hint_size(policy);
1120                if (!hint_size)
1121                        return 0; /* short-circuit hints initialization */
1122                cmd->policy_hint_size = hint_size;
1123
1124                if (cmd->hint_root) {
1125                        r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1126                        if (r)
1127                                return r;
1128                }
1129
1130                r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
1131                if (r)
1132                        return r;
1133
1134                value = cpu_to_le32(0);
1135                __dm_bless_for_disk(&value);
1136                r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
1137                                    from_cblock(cmd->cache_blocks),
1138                                    &value, &cmd->hint_root);
1139                if (r)
1140                        return r;
1141        }
1142
1143        return 0;
1144}
1145
1146int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1147{
1148        int r;
1149
1150        down_write(&cmd->root_lock);
1151        r = begin_hints(cmd, policy);
1152        up_write(&cmd->root_lock);
1153
1154        return r;
1155}
1156
1157static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1158                     uint32_t hint)
1159{
1160        int r;
1161        __le32 value = cpu_to_le32(hint);
1162        __dm_bless_for_disk(&value);
1163
1164        r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
1165                               from_cblock(cblock), &value, &cmd->hint_root);
1166        cmd->changed = true;
1167
1168        return r;
1169}
1170
1171int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1172                       uint32_t hint)
1173{
1174        int r;
1175
1176        if (!hints_array_initialized(cmd))
1177                return 0;
1178
1179        down_write(&cmd->root_lock);
1180        r = save_hint(cmd, cblock, hint);
1181        up_write(&cmd->root_lock);
1182
1183        return r;
1184}
1185