linux/drivers/md/dm-cache-metadata.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Red Hat, Inc.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-cache-metadata.h"
   8
   9#include "persistent-data/dm-array.h"
  10#include "persistent-data/dm-bitset.h"
  11#include "persistent-data/dm-space-map.h"
  12#include "persistent-data/dm-space-map-disk.h"
  13#include "persistent-data/dm-transaction-manager.h"
  14
  15#include <linux/device-mapper.h>
  16
  17/*----------------------------------------------------------------*/
  18
  19#define DM_MSG_PREFIX   "cache metadata"
  20
  21#define CACHE_SUPERBLOCK_MAGIC 06142003
  22#define CACHE_SUPERBLOCK_LOCATION 0
  23
  24/*
  25 * defines a range of metadata versions that this module can handle.
  26 */
  27#define MIN_CACHE_VERSION 1
  28#define MAX_CACHE_VERSION 2
  29
  30/*
  31 *  3 for btree insert +
  32 *  2 for btree lookup used within space map
  33 */
  34#define CACHE_MAX_CONCURRENT_LOCKS 5
  35#define SPACE_MAP_ROOT_SIZE 128
  36
  37enum superblock_flag_bits {
  38        /* for spotting crashes that would invalidate the dirty bitset */
  39        CLEAN_SHUTDOWN,
  40        /* metadata must be checked using the tools */
  41        NEEDS_CHECK,
  42};
  43
  44/*
  45 * Each mapping from cache block -> origin block carries a set of flags.
  46 */
  47enum mapping_bits {
  48        /*
  49         * A valid mapping.  Because we're using an array we clear this
  50         * flag for an non existant mapping.
  51         */
  52        M_VALID = 1,
  53
  54        /*
  55         * The data on the cache is different from that on the origin.
  56         * This flag is only used by metadata format 1.
  57         */
  58        M_DIRTY = 2
  59};
  60
  61struct cache_disk_superblock {
  62        __le32 csum;
  63        __le32 flags;
  64        __le64 blocknr;
  65
  66        __u8 uuid[16];
  67        __le64 magic;
  68        __le32 version;
  69
  70        __u8 policy_name[CACHE_POLICY_NAME_SIZE];
  71        __le32 policy_hint_size;
  72
  73        __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
  74        __le64 mapping_root;
  75        __le64 hint_root;
  76
  77        __le64 discard_root;
  78        __le64 discard_block_size;
  79        __le64 discard_nr_blocks;
  80
  81        __le32 data_block_size;
  82        __le32 metadata_block_size;
  83        __le32 cache_blocks;
  84
  85        __le32 compat_flags;
  86        __le32 compat_ro_flags;
  87        __le32 incompat_flags;
  88
  89        __le32 read_hits;
  90        __le32 read_misses;
  91        __le32 write_hits;
  92        __le32 write_misses;
  93
  94        __le32 policy_version[CACHE_POLICY_VERSION_SIZE];
  95
  96        /*
  97         * Metadata format 2 fields.
  98         */
  99        __le64 dirty_root;
 100} __packed;
 101
 102struct dm_cache_metadata {
 103        atomic_t ref_count;
 104        struct list_head list;
 105
 106        unsigned version;
 107        struct block_device *bdev;
 108        struct dm_block_manager *bm;
 109        struct dm_space_map *metadata_sm;
 110        struct dm_transaction_manager *tm;
 111
 112        struct dm_array_info info;
 113        struct dm_array_info hint_info;
 114        struct dm_disk_bitset discard_info;
 115
 116        struct rw_semaphore root_lock;
 117        unsigned long flags;
 118        dm_block_t root;
 119        dm_block_t hint_root;
 120        dm_block_t discard_root;
 121
 122        sector_t discard_block_size;
 123        dm_dblock_t discard_nr_blocks;
 124
 125        sector_t data_block_size;
 126        dm_cblock_t cache_blocks;
 127        bool changed:1;
 128        bool clean_when_opened:1;
 129
 130        char policy_name[CACHE_POLICY_NAME_SIZE];
 131        unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
 132        size_t policy_hint_size;
 133        struct dm_cache_statistics stats;
 134
 135        /*
 136         * Reading the space map root can fail, so we read it into this
 137         * buffer before the superblock is locked and updated.
 138         */
 139        __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
 140
 141        /*
 142         * Set if a transaction has to be aborted but the attempt to roll
 143         * back to the previous (good) transaction failed.  The only
 144         * metadata operation permissible in this state is the closing of
 145         * the device.
 146         */
 147        bool fail_io:1;
 148
 149        /*
 150         * Metadata format 2 fields.
 151         */
 152        dm_block_t dirty_root;
 153        struct dm_disk_bitset dirty_info;
 154
 155        /*
 156         * These structures are used when loading metadata.  They're too
 157         * big to put on the stack.
 158         */
 159        struct dm_array_cursor mapping_cursor;
 160        struct dm_array_cursor hint_cursor;
 161        struct dm_bitset_cursor dirty_cursor;
 162};
 163
 164/*-------------------------------------------------------------------
 165 * superblock validator
 166 *-----------------------------------------------------------------*/
 167
 168#define SUPERBLOCK_CSUM_XOR 9031977
 169
 170static void sb_prepare_for_write(struct dm_block_validator *v,
 171                                 struct dm_block *b,
 172                                 size_t sb_block_size)
 173{
 174        struct cache_disk_superblock *disk_super = dm_block_data(b);
 175
 176        disk_super->blocknr = cpu_to_le64(dm_block_location(b));
 177        disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
 178                                                      sb_block_size - sizeof(__le32),
 179                                                      SUPERBLOCK_CSUM_XOR));
 180}
 181
 182static int check_metadata_version(struct cache_disk_superblock *disk_super)
 183{
 184        uint32_t metadata_version = le32_to_cpu(disk_super->version);
 185
 186        if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
 187                DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
 188                      metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
 189                return -EINVAL;
 190        }
 191
 192        return 0;
 193}
 194
 195static int sb_check(struct dm_block_validator *v,
 196                    struct dm_block *b,
 197                    size_t sb_block_size)
 198{
 199        struct cache_disk_superblock *disk_super = dm_block_data(b);
 200        __le32 csum_le;
 201
 202        if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
 203                DMERR("sb_check failed: blocknr %llu: wanted %llu",
 204                      le64_to_cpu(disk_super->blocknr),
 205                      (unsigned long long)dm_block_location(b));
 206                return -ENOTBLK;
 207        }
 208
 209        if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
 210                DMERR("sb_check failed: magic %llu: wanted %llu",
 211                      le64_to_cpu(disk_super->magic),
 212                      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
 213                return -EILSEQ;
 214        }
 215
 216        csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
 217                                             sb_block_size - sizeof(__le32),
 218                                             SUPERBLOCK_CSUM_XOR));
 219        if (csum_le != disk_super->csum) {
 220                DMERR("sb_check failed: csum %u: wanted %u",
 221                      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
 222                return -EILSEQ;
 223        }
 224
 225        return check_metadata_version(disk_super);
 226}
 227
 228static struct dm_block_validator sb_validator = {
 229        .name = "superblock",
 230        .prepare_for_write = sb_prepare_for_write,
 231        .check = sb_check
 232};
 233
 234/*----------------------------------------------------------------*/
 235
 236static int superblock_read_lock(struct dm_cache_metadata *cmd,
 237                                struct dm_block **sblock)
 238{
 239        return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 240                               &sb_validator, sblock);
 241}
 242
 243static int superblock_lock_zero(struct dm_cache_metadata *cmd,
 244                                struct dm_block **sblock)
 245{
 246        return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 247                                     &sb_validator, sblock);
 248}
 249
 250static int superblock_lock(struct dm_cache_metadata *cmd,
 251                           struct dm_block **sblock)
 252{
 253        return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 254                                &sb_validator, sblock);
 255}
 256
 257/*----------------------------------------------------------------*/
 258
 259static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result)
 260{
 261        int r;
 262        unsigned i;
 263        struct dm_block *b;
 264        __le64 *data_le, zero = cpu_to_le64(0);
 265        unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
 266
 267        /*
 268         * We can't use a validator here - it may be all zeroes.
 269         */
 270        r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
 271        if (r)
 272                return r;
 273
 274        data_le = dm_block_data(b);
 275        *result = true;
 276        for (i = 0; i < sb_block_size; i++) {
 277                if (data_le[i] != zero) {
 278                        *result = false;
 279                        break;
 280                }
 281        }
 282
 283        dm_bm_unlock(b);
 284
 285        return 0;
 286}
 287
 288static void __setup_mapping_info(struct dm_cache_metadata *cmd)
 289{
 290        struct dm_btree_value_type vt;
 291
 292        vt.context = NULL;
 293        vt.size = sizeof(__le64);
 294        vt.inc = NULL;
 295        vt.dec = NULL;
 296        vt.equal = NULL;
 297        dm_array_info_init(&cmd->info, cmd->tm, &vt);
 298
 299        if (cmd->policy_hint_size) {
 300                vt.size = sizeof(__le32);
 301                dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
 302        }
 303}
 304
 305static int __save_sm_root(struct dm_cache_metadata *cmd)
 306{
 307        int r;
 308        size_t metadata_len;
 309
 310        r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
 311        if (r < 0)
 312                return r;
 313
 314        return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
 315                               metadata_len);
 316}
 317
 318static void __copy_sm_root(struct dm_cache_metadata *cmd,
 319                           struct cache_disk_superblock *disk_super)
 320{
 321        memcpy(&disk_super->metadata_space_map_root,
 322               &cmd->metadata_space_map_root,
 323               sizeof(cmd->metadata_space_map_root));
 324}
 325
 326static bool separate_dirty_bits(struct dm_cache_metadata *cmd)
 327{
 328        return cmd->version >= 2;
 329}
 330
 331static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 332{
 333        int r;
 334        struct dm_block *sblock;
 335        struct cache_disk_superblock *disk_super;
 336        sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
 337
 338        /* FIXME: see if we can lose the max sectors limit */
 339        if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
 340                bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
 341
 342        r = dm_tm_pre_commit(cmd->tm);
 343        if (r < 0)
 344                return r;
 345
 346        /*
 347         * dm_sm_copy_root() can fail.  So we need to do it before we start
 348         * updating the superblock.
 349         */
 350        r = __save_sm_root(cmd);
 351        if (r)
 352                return r;
 353
 354        r = superblock_lock_zero(cmd, &sblock);
 355        if (r)
 356                return r;
 357
 358        disk_super = dm_block_data(sblock);
 359        disk_super->flags = 0;
 360        memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
 361        disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
 362        disk_super->version = cpu_to_le32(cmd->version);
 363        memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
 364        memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
 365        disk_super->policy_hint_size = cpu_to_le32(0);
 366
 367        __copy_sm_root(cmd, disk_super);
 368
 369        disk_super->mapping_root = cpu_to_le64(cmd->root);
 370        disk_super->hint_root = cpu_to_le64(cmd->hint_root);
 371        disk_super->discard_root = cpu_to_le64(cmd->discard_root);
 372        disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
 373        disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
 374        disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
 375        disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
 376        disk_super->cache_blocks = cpu_to_le32(0);
 377
 378        disk_super->read_hits = cpu_to_le32(0);
 379        disk_super->read_misses = cpu_to_le32(0);
 380        disk_super->write_hits = cpu_to_le32(0);
 381        disk_super->write_misses = cpu_to_le32(0);
 382
 383        if (separate_dirty_bits(cmd))
 384                disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
 385
 386        return dm_tm_commit(cmd->tm, sblock);
 387}
 388
 389static int __format_metadata(struct dm_cache_metadata *cmd)
 390{
 391        int r;
 392
 393        r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 394                                 &cmd->tm, &cmd->metadata_sm);
 395        if (r < 0) {
 396                DMERR("tm_create_with_sm failed");
 397                return r;
 398        }
 399
 400        __setup_mapping_info(cmd);
 401
 402        r = dm_array_empty(&cmd->info, &cmd->root);
 403        if (r < 0)
 404                goto bad;
 405
 406        if (separate_dirty_bits(cmd)) {
 407                dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
 408                r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root);
 409                if (r < 0)
 410                        goto bad;
 411        }
 412
 413        dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
 414        r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
 415        if (r < 0)
 416                goto bad;
 417
 418        cmd->discard_block_size = 0;
 419        cmd->discard_nr_blocks = 0;
 420
 421        r = __write_initial_superblock(cmd);
 422        if (r)
 423                goto bad;
 424
 425        cmd->clean_when_opened = true;
 426        return 0;
 427
 428bad:
 429        dm_tm_destroy(cmd->tm);
 430        dm_sm_destroy(cmd->metadata_sm);
 431
 432        return r;
 433}
 434
 435static int __check_incompat_features(struct cache_disk_superblock *disk_super,
 436                                     struct dm_cache_metadata *cmd)
 437{
 438        uint32_t incompat_flags, features;
 439
 440        incompat_flags = le32_to_cpu(disk_super->incompat_flags);
 441        features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
 442        if (features) {
 443                DMERR("could not access metadata due to unsupported optional features (%lx).",
 444                      (unsigned long)features);
 445                return -EINVAL;
 446        }
 447
 448        /*
 449         * Check for read-only metadata to skip the following RDWR checks.
 450         */
 451        if (get_disk_ro(cmd->bdev->bd_disk))
 452                return 0;
 453
 454        features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
 455        if (features) {
 456                DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
 457                      (unsigned long)features);
 458                return -EINVAL;
 459        }
 460
 461        return 0;
 462}
 463
 464static int __open_metadata(struct dm_cache_metadata *cmd)
 465{
 466        int r;
 467        struct dm_block *sblock;
 468        struct cache_disk_superblock *disk_super;
 469        unsigned long sb_flags;
 470
 471        r = superblock_read_lock(cmd, &sblock);
 472        if (r < 0) {
 473                DMERR("couldn't read lock superblock");
 474                return r;
 475        }
 476
 477        disk_super = dm_block_data(sblock);
 478
 479        /* Verify the data block size hasn't changed */
 480        if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
 481                DMERR("changing the data block size (from %u to %llu) is not supported",
 482                      le32_to_cpu(disk_super->data_block_size),
 483                      (unsigned long long)cmd->data_block_size);
 484                r = -EINVAL;
 485                goto bad;
 486        }
 487
 488        r = __check_incompat_features(disk_super, cmd);
 489        if (r < 0)
 490                goto bad;
 491
 492        r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
 493                               disk_super->metadata_space_map_root,
 494                               sizeof(disk_super->metadata_space_map_root),
 495                               &cmd->tm, &cmd->metadata_sm);
 496        if (r < 0) {
 497                DMERR("tm_open_with_sm failed");
 498                goto bad;
 499        }
 500
 501        __setup_mapping_info(cmd);
 502        dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
 503        dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
 504        sb_flags = le32_to_cpu(disk_super->flags);
 505        cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
 506        dm_bm_unlock(sblock);
 507
 508        return 0;
 509
 510bad:
 511        dm_bm_unlock(sblock);
 512        return r;
 513}
 514
 515static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
 516                                     bool format_device)
 517{
 518        int r;
 519        bool unformatted = false;
 520
 521        r = __superblock_all_zeroes(cmd->bm, &unformatted);
 522        if (r)
 523                return r;
 524
 525        if (unformatted)
 526                return format_device ? __format_metadata(cmd) : -EPERM;
 527
 528        return __open_metadata(cmd);
 529}
 530
 531static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
 532                                            bool may_format_device)
 533{
 534        int r;
 535        cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
 536                                          CACHE_MAX_CONCURRENT_LOCKS);
 537        if (IS_ERR(cmd->bm)) {
 538                DMERR("could not create block manager");
 539                return PTR_ERR(cmd->bm);
 540        }
 541
 542        r = __open_or_format_metadata(cmd, may_format_device);
 543        if (r)
 544                dm_block_manager_destroy(cmd->bm);
 545
 546        return r;
 547}
 548
 549static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
 550{
 551        dm_sm_destroy(cmd->metadata_sm);
 552        dm_tm_destroy(cmd->tm);
 553        dm_block_manager_destroy(cmd->bm);
 554}
 555
 556typedef unsigned long (*flags_mutator)(unsigned long);
 557
 558static void update_flags(struct cache_disk_superblock *disk_super,
 559                         flags_mutator mutator)
 560{
 561        uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
 562        disk_super->flags = cpu_to_le32(sb_flags);
 563}
 564
 565static unsigned long set_clean_shutdown(unsigned long flags)
 566{
 567        set_bit(CLEAN_SHUTDOWN, &flags);
 568        return flags;
 569}
 570
 571static unsigned long clear_clean_shutdown(unsigned long flags)
 572{
 573        clear_bit(CLEAN_SHUTDOWN, &flags);
 574        return flags;
 575}
 576
 577static void read_superblock_fields(struct dm_cache_metadata *cmd,
 578                                   struct cache_disk_superblock *disk_super)
 579{
 580        cmd->version = le32_to_cpu(disk_super->version);
 581        cmd->flags = le32_to_cpu(disk_super->flags);
 582        cmd->root = le64_to_cpu(disk_super->mapping_root);
 583        cmd->hint_root = le64_to_cpu(disk_super->hint_root);
 584        cmd->discard_root = le64_to_cpu(disk_super->discard_root);
 585        cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
 586        cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
 587        cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
 588        cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
 589        strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
 590        cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
 591        cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
 592        cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
 593        cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
 594
 595        cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
 596        cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
 597        cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
 598        cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
 599
 600        if (separate_dirty_bits(cmd))
 601                cmd->dirty_root = le64_to_cpu(disk_super->dirty_root);
 602
 603        cmd->changed = false;
 604}
 605
 606/*
 607 * The mutator updates the superblock flags.
 608 */
 609static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
 610                                     flags_mutator mutator)
 611{
 612        int r;
 613        struct cache_disk_superblock *disk_super;
 614        struct dm_block *sblock;
 615
 616        r = superblock_lock(cmd, &sblock);
 617        if (r)
 618                return r;
 619
 620        disk_super = dm_block_data(sblock);
 621        update_flags(disk_super, mutator);
 622        read_superblock_fields(cmd, disk_super);
 623        dm_bm_unlock(sblock);
 624
 625        return dm_bm_flush(cmd->bm);
 626}
 627
 628static int __begin_transaction(struct dm_cache_metadata *cmd)
 629{
 630        int r;
 631        struct cache_disk_superblock *disk_super;
 632        struct dm_block *sblock;
 633
 634        /*
 635         * We re-read the superblock every time.  Shouldn't need to do this
 636         * really.
 637         */
 638        r = superblock_read_lock(cmd, &sblock);
 639        if (r)
 640                return r;
 641
 642        disk_super = dm_block_data(sblock);
 643        read_superblock_fields(cmd, disk_super);
 644        dm_bm_unlock(sblock);
 645
 646        return 0;
 647}
 648
 649static int __commit_transaction(struct dm_cache_metadata *cmd,
 650                                flags_mutator mutator)
 651{
 652        int r;
 653        struct cache_disk_superblock *disk_super;
 654        struct dm_block *sblock;
 655
 656        /*
 657         * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
 658         */
 659        BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
 660
 661        if (separate_dirty_bits(cmd)) {
 662                r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root,
 663                                    &cmd->dirty_root);
 664                if (r)
 665                        return r;
 666        }
 667
 668        r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
 669                            &cmd->discard_root);
 670        if (r)
 671                return r;
 672
 673        r = dm_tm_pre_commit(cmd->tm);
 674        if (r < 0)
 675                return r;
 676
 677        r = __save_sm_root(cmd);
 678        if (r)
 679                return r;
 680
 681        r = superblock_lock(cmd, &sblock);
 682        if (r)
 683                return r;
 684
 685        disk_super = dm_block_data(sblock);
 686
 687        disk_super->flags = cpu_to_le32(cmd->flags);
 688        if (mutator)
 689                update_flags(disk_super, mutator);
 690
 691        disk_super->mapping_root = cpu_to_le64(cmd->root);
 692        if (separate_dirty_bits(cmd))
 693                disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
 694        disk_super->hint_root = cpu_to_le64(cmd->hint_root);
 695        disk_super->discard_root = cpu_to_le64(cmd->discard_root);
 696        disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
 697        disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
 698        disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
 699        strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
 700        disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
 701        disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
 702        disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
 703        disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size);
 704
 705        disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
 706        disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
 707        disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
 708        disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
 709        __copy_sm_root(cmd, disk_super);
 710
 711        return dm_tm_commit(cmd->tm, sblock);
 712}
 713
 714/*----------------------------------------------------------------*/
 715
 716/*
 717 * The mappings are held in a dm-array that has 64-bit values stored in
 718 * little-endian format.  The index is the cblock, the high 48bits of the
 719 * value are the oblock and the low 16 bit the flags.
 720 */
 721#define FLAGS_MASK ((1 << 16) - 1)
 722
 723static __le64 pack_value(dm_oblock_t block, unsigned flags)
 724{
 725        uint64_t value = from_oblock(block);
 726        value <<= 16;
 727        value = value | (flags & FLAGS_MASK);
 728        return cpu_to_le64(value);
 729}
 730
 731static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
 732{
 733        uint64_t value = le64_to_cpu(value_le);
 734        uint64_t b = value >> 16;
 735        *block = to_oblock(b);
 736        *flags = value & FLAGS_MASK;
 737}
 738
 739/*----------------------------------------------------------------*/
 740
 741static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
 742                                               sector_t data_block_size,
 743                                               bool may_format_device,
 744                                               size_t policy_hint_size,
 745                                               unsigned metadata_version)
 746{
 747        int r;
 748        struct dm_cache_metadata *cmd;
 749
 750        cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 751        if (!cmd) {
 752                DMERR("could not allocate metadata struct");
 753                return ERR_PTR(-ENOMEM);
 754        }
 755
 756        cmd->version = metadata_version;
 757        atomic_set(&cmd->ref_count, 1);
 758        init_rwsem(&cmd->root_lock);
 759        cmd->bdev = bdev;
 760        cmd->data_block_size = data_block_size;
 761        cmd->cache_blocks = 0;
 762        cmd->policy_hint_size = policy_hint_size;
 763        cmd->changed = true;
 764        cmd->fail_io = false;
 765
 766        r = __create_persistent_data_objects(cmd, may_format_device);
 767        if (r) {
 768                kfree(cmd);
 769                return ERR_PTR(r);
 770        }
 771
 772        r = __begin_transaction_flags(cmd, clear_clean_shutdown);
 773        if (r < 0) {
 774                dm_cache_metadata_close(cmd);
 775                return ERR_PTR(r);
 776        }
 777
 778        return cmd;
 779}
 780
 781/*
 782 * We keep a little list of ref counted metadata objects to prevent two
 783 * different target instances creating separate bufio instances.  This is
 784 * an issue if a table is reloaded before the suspend.
 785 */
 786static DEFINE_MUTEX(table_lock);
 787static LIST_HEAD(table);
 788
 789static struct dm_cache_metadata *lookup(struct block_device *bdev)
 790{
 791        struct dm_cache_metadata *cmd;
 792
 793        list_for_each_entry(cmd, &table, list)
 794                if (cmd->bdev == bdev) {
 795                        atomic_inc(&cmd->ref_count);
 796                        return cmd;
 797                }
 798
 799        return NULL;
 800}
 801
 802static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
 803                                                sector_t data_block_size,
 804                                                bool may_format_device,
 805                                                size_t policy_hint_size,
 806                                                unsigned metadata_version)
 807{
 808        struct dm_cache_metadata *cmd, *cmd2;
 809
 810        mutex_lock(&table_lock);
 811        cmd = lookup(bdev);
 812        mutex_unlock(&table_lock);
 813
 814        if (cmd)
 815                return cmd;
 816
 817        cmd = metadata_open(bdev, data_block_size, may_format_device,
 818                            policy_hint_size, metadata_version);
 819        if (!IS_ERR(cmd)) {
 820                mutex_lock(&table_lock);
 821                cmd2 = lookup(bdev);
 822                if (cmd2) {
 823                        mutex_unlock(&table_lock);
 824                        __destroy_persistent_data_objects(cmd);
 825                        kfree(cmd);
 826                        return cmd2;
 827                }
 828                list_add(&cmd->list, &table);
 829                mutex_unlock(&table_lock);
 830        }
 831
 832        return cmd;
 833}
 834
 835static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
 836{
 837        if (cmd->data_block_size != data_block_size) {
 838                DMERR("data_block_size (%llu) different from that in metadata (%llu)",
 839                      (unsigned long long) data_block_size,
 840                      (unsigned long long) cmd->data_block_size);
 841                return false;
 842        }
 843
 844        return true;
 845}
 846
 847struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
 848                                                 sector_t data_block_size,
 849                                                 bool may_format_device,
 850                                                 size_t policy_hint_size,
 851                                                 unsigned metadata_version)
 852{
 853        struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device,
 854                                                       policy_hint_size, metadata_version);
 855
 856        if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
 857                dm_cache_metadata_close(cmd);
 858                return ERR_PTR(-EINVAL);
 859        }
 860
 861        return cmd;
 862}
 863
 864void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
 865{
 866        if (atomic_dec_and_test(&cmd->ref_count)) {
 867                mutex_lock(&table_lock);
 868                list_del(&cmd->list);
 869                mutex_unlock(&table_lock);
 870
 871                if (!cmd->fail_io)
 872                        __destroy_persistent_data_objects(cmd);
 873                kfree(cmd);
 874        }
 875}
 876
 877/*
 878 * Checks that the given cache block is either unmapped or clean.
 879 */
 880static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b,
 881                                      bool *result)
 882{
 883        int r;
 884        __le64 value;
 885        dm_oblock_t ob;
 886        unsigned flags;
 887
 888        r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
 889        if (r)
 890                return r;
 891
 892        unpack_value(value, &ob, &flags);
 893        *result = !((flags & M_VALID) && (flags & M_DIRTY));
 894
 895        return 0;
 896}
 897
 898static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd,
 899                                           dm_cblock_t begin, dm_cblock_t end,
 900                                           bool *result)
 901{
 902        int r;
 903        *result = true;
 904
 905        while (begin != end) {
 906                r = block_clean_combined_dirty(cmd, begin, result);
 907                if (r) {
 908                        DMERR("block_clean_combined_dirty failed");
 909                        return r;
 910                }
 911
 912                if (!*result) {
 913                        DMERR("cache block %llu is dirty",
 914                              (unsigned long long) from_cblock(begin));
 915                        return 0;
 916                }
 917
 918                begin = to_cblock(from_cblock(begin) + 1);
 919        }
 920
 921        return 0;
 922}
 923
 924static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
 925                                           dm_cblock_t begin, dm_cblock_t end,
 926                                           bool *result)
 927{
 928        int r;
 929        bool dirty_flag;
 930        *result = true;
 931
 932        if (from_cblock(cmd->cache_blocks) == 0)
 933                /* Nothing to do */
 934                return 0;
 935
 936        r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
 937                                   from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
 938        if (r) {
 939                DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
 940                return r;
 941        }
 942
 943        r = dm_bitset_cursor_skip(&cmd->dirty_cursor, from_cblock(begin));
 944        if (r) {
 945                DMERR("%s: dm_bitset_cursor_skip for dirty failed", __func__);
 946                dm_bitset_cursor_end(&cmd->dirty_cursor);
 947                return r;
 948        }
 949
 950        while (begin != end) {
 951                /*
 952                 * We assume that unmapped blocks have their dirty bit
 953                 * cleared.
 954                 */
 955                dirty_flag = dm_bitset_cursor_get_value(&cmd->dirty_cursor);
 956                if (dirty_flag) {
 957                        DMERR("%s: cache block %llu is dirty", __func__,
 958                              (unsigned long long) from_cblock(begin));
 959                        dm_bitset_cursor_end(&cmd->dirty_cursor);
 960                        *result = false;
 961                        return 0;
 962                }
 963
 964                begin = to_cblock(from_cblock(begin) + 1);
 965                if (begin == end)
 966                        break;
 967
 968                r = dm_bitset_cursor_next(&cmd->dirty_cursor);
 969                if (r) {
 970                        DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
 971                        dm_bitset_cursor_end(&cmd->dirty_cursor);
 972                        return r;
 973                }
 974        }
 975
 976        dm_bitset_cursor_end(&cmd->dirty_cursor);
 977
 978        return 0;
 979}
 980
 981static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
 982                                        dm_cblock_t begin, dm_cblock_t end,
 983                                        bool *result)
 984{
 985        if (separate_dirty_bits(cmd))
 986                return blocks_are_clean_separate_dirty(cmd, begin, end, result);
 987        else
 988                return blocks_are_clean_combined_dirty(cmd, begin, end, result);
 989}
 990
 991static bool cmd_write_lock(struct dm_cache_metadata *cmd)
 992{
 993        down_write(&cmd->root_lock);
 994        if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
 995                up_write(&cmd->root_lock);
 996                return false;
 997        }
 998        return true;
 999}
1000
1001#define WRITE_LOCK(cmd)                         \
1002        do {                                    \
1003                if (!cmd_write_lock((cmd)))     \
1004                        return -EINVAL;         \
1005        } while(0)
1006
1007#define WRITE_LOCK_VOID(cmd)                    \
1008        do {                                    \
1009                if (!cmd_write_lock((cmd)))     \
1010                        return;                 \
1011        } while(0)
1012
1013#define WRITE_UNLOCK(cmd) \
1014        up_write(&(cmd)->root_lock)
1015
1016static bool cmd_read_lock(struct dm_cache_metadata *cmd)
1017{
1018        down_read(&cmd->root_lock);
1019        if (cmd->fail_io) {
1020                up_read(&cmd->root_lock);
1021                return false;
1022        }
1023        return true;
1024}
1025
1026#define READ_LOCK(cmd)                          \
1027        do {                                    \
1028                if (!cmd_read_lock((cmd)))      \
1029                        return -EINVAL;         \
1030        } while(0)
1031
1032#define READ_LOCK_VOID(cmd)                     \
1033        do {                                    \
1034                if (!cmd_read_lock((cmd)))      \
1035                        return;                 \
1036        } while(0)
1037
1038#define READ_UNLOCK(cmd) \
1039        up_read(&(cmd)->root_lock)
1040
1041int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
1042{
1043        int r;
1044        bool clean;
1045        __le64 null_mapping = pack_value(0, 0);
1046
1047        WRITE_LOCK(cmd);
1048        __dm_bless_for_disk(&null_mapping);
1049
1050        if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
1051                r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean);
1052                if (r) {
1053                        __dm_unbless_for_disk(&null_mapping);
1054                        goto out;
1055                }
1056
1057                if (!clean) {
1058                        DMERR("unable to shrink cache due to dirty blocks");
1059                        r = -EINVAL;
1060                        __dm_unbless_for_disk(&null_mapping);
1061                        goto out;
1062                }
1063        }
1064
1065        r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
1066                            from_cblock(new_cache_size),
1067                            &null_mapping, &cmd->root);
1068        if (r)
1069                goto out;
1070
1071        if (separate_dirty_bits(cmd)) {
1072                r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root,
1073                                     from_cblock(cmd->cache_blocks), from_cblock(new_cache_size),
1074                                     false, &cmd->dirty_root);
1075                if (r)
1076                        goto out;
1077        }
1078
1079        cmd->cache_blocks = new_cache_size;
1080        cmd->changed = true;
1081
1082out:
1083        WRITE_UNLOCK(cmd);
1084
1085        return r;
1086}
1087
1088int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
1089                                   sector_t discard_block_size,
1090                                   dm_dblock_t new_nr_entries)
1091{
1092        int r;
1093
1094        WRITE_LOCK(cmd);
1095        r = dm_bitset_resize(&cmd->discard_info,
1096                             cmd->discard_root,
1097                             from_dblock(cmd->discard_nr_blocks),
1098                             from_dblock(new_nr_entries),
1099                             false, &cmd->discard_root);
1100        if (!r) {
1101                cmd->discard_block_size = discard_block_size;
1102                cmd->discard_nr_blocks = new_nr_entries;
1103        }
1104
1105        cmd->changed = true;
1106        WRITE_UNLOCK(cmd);
1107
1108        return r;
1109}
1110
1111static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
1112{
1113        return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
1114                                 from_dblock(b), &cmd->discard_root);
1115}
1116
1117static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
1118{
1119        return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
1120                                   from_dblock(b), &cmd->discard_root);
1121}
1122
1123static int __discard(struct dm_cache_metadata *cmd,
1124                     dm_dblock_t dblock, bool discard)
1125{
1126        int r;
1127
1128        r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
1129        if (r)
1130                return r;
1131
1132        cmd->changed = true;
1133        return 0;
1134}
1135
1136int dm_cache_set_discard(struct dm_cache_metadata *cmd,
1137                         dm_dblock_t dblock, bool discard)
1138{
1139        int r;
1140
1141        WRITE_LOCK(cmd);
1142        r = __discard(cmd, dblock, discard);
1143        WRITE_UNLOCK(cmd);
1144
1145        return r;
1146}
1147
1148static int __load_discards(struct dm_cache_metadata *cmd,
1149                           load_discard_fn fn, void *context)
1150{
1151        int r = 0;
1152        uint32_t b;
1153        struct dm_bitset_cursor c;
1154
1155        if (from_dblock(cmd->discard_nr_blocks) == 0)
1156                /* nothing to do */
1157                return 0;
1158
1159        if (cmd->clean_when_opened) {
1160                r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root);
1161                if (r)
1162                        return r;
1163
1164                r = dm_bitset_cursor_begin(&cmd->discard_info, cmd->discard_root,
1165                                           from_dblock(cmd->discard_nr_blocks), &c);
1166                if (r)
1167                        return r;
1168
1169                for (b = 0; ; b++) {
1170                        r = fn(context, cmd->discard_block_size, to_dblock(b),
1171                               dm_bitset_cursor_get_value(&c));
1172                        if (r)
1173                                break;
1174
1175                        if (b >= (from_dblock(cmd->discard_nr_blocks) - 1))
1176                                break;
1177
1178                        r = dm_bitset_cursor_next(&c);
1179                        if (r)
1180                                break;
1181                }
1182
1183                dm_bitset_cursor_end(&c);
1184
1185        } else {
1186                for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
1187                        r = fn(context, cmd->discard_block_size, to_dblock(b), false);
1188                        if (r)
1189                                return r;
1190                }
1191        }
1192
1193        return r;
1194}
1195
1196int dm_cache_load_discards(struct dm_cache_metadata *cmd,
1197                           load_discard_fn fn, void *context)
1198{
1199        int r;
1200
1201        READ_LOCK(cmd);
1202        r = __load_discards(cmd, fn, context);
1203        READ_UNLOCK(cmd);
1204
1205        return r;
1206}
1207
1208int dm_cache_size(struct dm_cache_metadata *cmd, dm_cblock_t *result)
1209{
1210        READ_LOCK(cmd);
1211        *result = cmd->cache_blocks;
1212        READ_UNLOCK(cmd);
1213
1214        return 0;
1215}
1216
1217static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1218{
1219        int r;
1220        __le64 value = pack_value(0, 0);
1221
1222        __dm_bless_for_disk(&value);
1223        r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1224                               &value, &cmd->root);
1225        if (r)
1226                return r;
1227
1228        cmd->changed = true;
1229        return 0;
1230}
1231
1232int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
1233{
1234        int r;
1235
1236        WRITE_LOCK(cmd);
1237        r = __remove(cmd, cblock);
1238        WRITE_UNLOCK(cmd);
1239
1240        return r;
1241}
1242
1243static int __insert(struct dm_cache_metadata *cmd,
1244                    dm_cblock_t cblock, dm_oblock_t oblock)
1245{
1246        int r;
1247        __le64 value = pack_value(oblock, M_VALID);
1248        __dm_bless_for_disk(&value);
1249
1250        r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1251                               &value, &cmd->root);
1252        if (r)
1253                return r;
1254
1255        cmd->changed = true;
1256        return 0;
1257}
1258
1259int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
1260                            dm_cblock_t cblock, dm_oblock_t oblock)
1261{
1262        int r;
1263
1264        WRITE_LOCK(cmd);
1265        r = __insert(cmd, cblock, oblock);
1266        WRITE_UNLOCK(cmd);
1267
1268        return r;
1269}
1270
1271struct thunk {
1272        load_mapping_fn fn;
1273        void *context;
1274
1275        struct dm_cache_metadata *cmd;
1276        bool respect_dirty_flags;
1277        bool hints_valid;
1278};
1279
1280static bool policy_unchanged(struct dm_cache_metadata *cmd,
1281                             struct dm_cache_policy *policy)
1282{
1283        const char *policy_name = dm_cache_policy_get_name(policy);
1284        const unsigned *policy_version = dm_cache_policy_get_version(policy);
1285        size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
1286
1287        /*
1288         * Ensure policy names match.
1289         */
1290        if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
1291                return false;
1292
1293        /*
1294         * Ensure policy major versions match.
1295         */
1296        if (cmd->policy_version[0] != policy_version[0])
1297                return false;
1298
1299        /*
1300         * Ensure policy hint sizes match.
1301         */
1302        if (cmd->policy_hint_size != policy_hint_size)
1303                return false;
1304
1305        return true;
1306}
1307
1308static bool hints_array_initialized(struct dm_cache_metadata *cmd)
1309{
1310        return cmd->hint_root && cmd->policy_hint_size;
1311}
1312
1313static bool hints_array_available(struct dm_cache_metadata *cmd,
1314                                  struct dm_cache_policy *policy)
1315{
1316        return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
1317                hints_array_initialized(cmd);
1318}
1319
1320static int __load_mapping_v1(struct dm_cache_metadata *cmd,
1321                             uint64_t cb, bool hints_valid,
1322                             struct dm_array_cursor *mapping_cursor,
1323                             struct dm_array_cursor *hint_cursor,
1324                             load_mapping_fn fn, void *context)
1325{
1326        int r = 0;
1327
1328        __le64 mapping;
1329        __le32 hint = 0;
1330
1331        __le64 *mapping_value_le;
1332        __le32 *hint_value_le;
1333
1334        dm_oblock_t oblock;
1335        unsigned flags;
1336        bool dirty = true;
1337
1338        dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1339        memcpy(&mapping, mapping_value_le, sizeof(mapping));
1340        unpack_value(mapping, &oblock, &flags);
1341
1342        if (flags & M_VALID) {
1343                if (hints_valid) {
1344                        dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1345                        memcpy(&hint, hint_value_le, sizeof(hint));
1346                }
1347                if (cmd->clean_when_opened)
1348                        dirty = flags & M_DIRTY;
1349
1350                r = fn(context, oblock, to_cblock(cb), dirty,
1351                       le32_to_cpu(hint), hints_valid);
1352                if (r) {
1353                        DMERR("policy couldn't load cache block %llu",
1354                              (unsigned long long) from_cblock(to_cblock(cb)));
1355                }
1356        }
1357
1358        return r;
1359}
1360
1361static int __load_mapping_v2(struct dm_cache_metadata *cmd,
1362                             uint64_t cb, bool hints_valid,
1363                             struct dm_array_cursor *mapping_cursor,
1364                             struct dm_array_cursor *hint_cursor,
1365                             struct dm_bitset_cursor *dirty_cursor,
1366                             load_mapping_fn fn, void *context)
1367{
1368        int r = 0;
1369
1370        __le64 mapping;
1371        __le32 hint = 0;
1372
1373        __le64 *mapping_value_le;
1374        __le32 *hint_value_le;
1375
1376        dm_oblock_t oblock;
1377        unsigned flags;
1378        bool dirty = true;
1379
1380        dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1381        memcpy(&mapping, mapping_value_le, sizeof(mapping));
1382        unpack_value(mapping, &oblock, &flags);
1383
1384        if (flags & M_VALID) {
1385                if (hints_valid) {
1386                        dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1387                        memcpy(&hint, hint_value_le, sizeof(hint));
1388                }
1389                if (cmd->clean_when_opened)
1390                        dirty = dm_bitset_cursor_get_value(dirty_cursor);
1391
1392                r = fn(context, oblock, to_cblock(cb), dirty,
1393                       le32_to_cpu(hint), hints_valid);
1394                if (r) {
1395                        DMERR("policy couldn't load cache block %llu",
1396                              (unsigned long long) from_cblock(to_cblock(cb)));
1397                }
1398        }
1399
1400        return r;
1401}
1402
1403static int __load_mappings(struct dm_cache_metadata *cmd,
1404                           struct dm_cache_policy *policy,
1405                           load_mapping_fn fn, void *context)
1406{
1407        int r;
1408        uint64_t cb;
1409
1410        bool hints_valid = hints_array_available(cmd, policy);
1411
1412        if (from_cblock(cmd->cache_blocks) == 0)
1413                /* Nothing to do */
1414                return 0;
1415
1416        r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
1417        if (r)
1418                return r;
1419
1420        if (hints_valid) {
1421                r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
1422                if (r) {
1423                        dm_array_cursor_end(&cmd->mapping_cursor);
1424                        return r;
1425                }
1426        }
1427
1428        if (separate_dirty_bits(cmd)) {
1429                r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
1430                                           from_cblock(cmd->cache_blocks),
1431                                           &cmd->dirty_cursor);
1432                if (r) {
1433                        dm_array_cursor_end(&cmd->hint_cursor);
1434                        dm_array_cursor_end(&cmd->mapping_cursor);
1435                        return r;
1436                }
1437        }
1438
1439        for (cb = 0; ; cb++) {
1440                if (separate_dirty_bits(cmd))
1441                        r = __load_mapping_v2(cmd, cb, hints_valid,
1442                                              &cmd->mapping_cursor,
1443                                              &cmd->hint_cursor,
1444                                              &cmd->dirty_cursor,
1445                                              fn, context);
1446                else
1447                        r = __load_mapping_v1(cmd, cb, hints_valid,
1448                                              &cmd->mapping_cursor, &cmd->hint_cursor,
1449                                              fn, context);
1450                if (r)
1451                        goto out;
1452
1453                /*
1454                 * We need to break out before we move the cursors.
1455                 */
1456                if (cb >= (from_cblock(cmd->cache_blocks) - 1))
1457                        break;
1458
1459                r = dm_array_cursor_next(&cmd->mapping_cursor);
1460                if (r) {
1461                        DMERR("dm_array_cursor_next for mapping failed");
1462                        goto out;
1463                }
1464
1465                if (hints_valid) {
1466                        r = dm_array_cursor_next(&cmd->hint_cursor);
1467                        if (r) {
1468                                dm_array_cursor_end(&cmd->hint_cursor);
1469                                hints_valid = false;
1470                        }
1471                }
1472
1473                if (separate_dirty_bits(cmd)) {
1474                        r = dm_bitset_cursor_next(&cmd->dirty_cursor);
1475                        if (r) {
1476                                DMERR("dm_bitset_cursor_next for dirty failed");
1477                                goto out;
1478                        }
1479                }
1480        }
1481out:
1482        dm_array_cursor_end(&cmd->mapping_cursor);
1483        if (hints_valid)
1484                dm_array_cursor_end(&cmd->hint_cursor);
1485
1486        if (separate_dirty_bits(cmd))
1487                dm_bitset_cursor_end(&cmd->dirty_cursor);
1488
1489        return r;
1490}
1491
1492int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
1493                           struct dm_cache_policy *policy,
1494                           load_mapping_fn fn, void *context)
1495{
1496        int r;
1497
1498        READ_LOCK(cmd);
1499        r = __load_mappings(cmd, policy, fn, context);
1500        READ_UNLOCK(cmd);
1501
1502        return r;
1503}
1504
1505static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
1506{
1507        int r = 0;
1508        __le64 value;
1509        dm_oblock_t oblock;
1510        unsigned flags;
1511
1512        memcpy(&value, leaf, sizeof(value));
1513        unpack_value(value, &oblock, &flags);
1514
1515        return r;
1516}
1517
1518static int __dump_mappings(struct dm_cache_metadata *cmd)
1519{
1520        return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
1521}
1522
1523void dm_cache_dump(struct dm_cache_metadata *cmd)
1524{
1525        READ_LOCK_VOID(cmd);
1526        __dump_mappings(cmd);
1527        READ_UNLOCK(cmd);
1528}
1529
1530int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
1531{
1532        int r;
1533
1534        READ_LOCK(cmd);
1535        r = cmd->changed;
1536        READ_UNLOCK(cmd);
1537
1538        return r;
1539}
1540
1541static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
1542{
1543        int r;
1544        unsigned flags;
1545        dm_oblock_t oblock;
1546        __le64 value;
1547
1548        r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
1549        if (r)
1550                return r;
1551
1552        unpack_value(value, &oblock, &flags);
1553
1554        if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
1555                /* nothing to be done */
1556                return 0;
1557
1558        value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
1559        __dm_bless_for_disk(&value);
1560
1561        r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
1562                               &value, &cmd->root);
1563        if (r)
1564                return r;
1565
1566        cmd->changed = true;
1567        return 0;
1568
1569}
1570
1571static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
1572{
1573        int r;
1574        unsigned i;
1575        for (i = 0; i < nr_bits; i++) {
1576                r = __dirty(cmd, to_cblock(i), test_bit(i, bits));
1577                if (r)
1578                        return r;
1579        }
1580
1581        return 0;
1582}
1583
1584static int is_dirty_callback(uint32_t index, bool *value, void *context)
1585{
1586        unsigned long *bits = context;
1587        *value = test_bit(index, bits);
1588        return 0;
1589}
1590
1591static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
1592{
1593        int r = 0;
1594
1595        /* nr_bits is really just a sanity check */
1596        if (nr_bits != from_cblock(cmd->cache_blocks)) {
1597                DMERR("dirty bitset is wrong size");
1598                return -EINVAL;
1599        }
1600
1601        r = dm_bitset_del(&cmd->dirty_info, cmd->dirty_root);
1602        if (r)
1603                return r;
1604
1605        cmd->changed = true;
1606        return dm_bitset_new(&cmd->dirty_info, &cmd->dirty_root, nr_bits, is_dirty_callback, bits);
1607}
1608
1609int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
1610                            unsigned nr_bits,
1611                            unsigned long *bits)
1612{
1613        int r;
1614
1615        WRITE_LOCK(cmd);
1616        if (separate_dirty_bits(cmd))
1617                r = __set_dirty_bits_v2(cmd, nr_bits, bits);
1618        else
1619                r = __set_dirty_bits_v1(cmd, nr_bits, bits);
1620        WRITE_UNLOCK(cmd);
1621
1622        return r;
1623}
1624
1625void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
1626                                 struct dm_cache_statistics *stats)
1627{
1628        READ_LOCK_VOID(cmd);
1629        *stats = cmd->stats;
1630        READ_UNLOCK(cmd);
1631}
1632
1633void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
1634                                 struct dm_cache_statistics *stats)
1635{
1636        WRITE_LOCK_VOID(cmd);
1637        cmd->stats = *stats;
1638        WRITE_UNLOCK(cmd);
1639}
1640
1641int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
1642{
1643        int r = -EINVAL;
1644        flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
1645                                 clear_clean_shutdown);
1646
1647        WRITE_LOCK(cmd);
1648        if (cmd->fail_io)
1649                goto out;
1650
1651        r = __commit_transaction(cmd, mutator);
1652        if (r)
1653                goto out;
1654
1655        r = __begin_transaction(cmd);
1656out:
1657        WRITE_UNLOCK(cmd);
1658        return r;
1659}
1660
1661int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
1662                                           dm_block_t *result)
1663{
1664        int r = -EINVAL;
1665
1666        READ_LOCK(cmd);
1667        if (!cmd->fail_io)
1668                r = dm_sm_get_nr_free(cmd->metadata_sm, result);
1669        READ_UNLOCK(cmd);
1670
1671        return r;
1672}
1673
1674int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
1675                                   dm_block_t *result)
1676{
1677        int r = -EINVAL;
1678
1679        READ_LOCK(cmd);
1680        if (!cmd->fail_io)
1681                r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
1682        READ_UNLOCK(cmd);
1683
1684        return r;
1685}
1686
1687/*----------------------------------------------------------------*/
1688
1689static int get_hint(uint32_t index, void *value_le, void *context)
1690{
1691        uint32_t value;
1692        struct dm_cache_policy *policy = context;
1693
1694        value = policy_get_hint(policy, to_cblock(index));
1695        *((__le32 *) value_le) = cpu_to_le32(value);
1696
1697        return 0;
1698}
1699
1700/*
1701 * It's quicker to always delete the hint array, and recreate with
1702 * dm_array_new().
1703 */
1704static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1705{
1706        int r;
1707        size_t hint_size;
1708        const char *policy_name = dm_cache_policy_get_name(policy);
1709        const unsigned *policy_version = dm_cache_policy_get_version(policy);
1710
1711        if (!policy_name[0] ||
1712            (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
1713                return -EINVAL;
1714
1715        strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
1716        memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
1717
1718        hint_size = dm_cache_policy_get_hint_size(policy);
1719        if (!hint_size)
1720                return 0; /* short-circuit hints initialization */
1721        cmd->policy_hint_size = hint_size;
1722
1723        if (cmd->hint_root) {
1724                r = dm_array_del(&cmd->hint_info, cmd->hint_root);
1725                if (r)
1726                        return r;
1727        }
1728
1729        return dm_array_new(&cmd->hint_info, &cmd->hint_root,
1730                            from_cblock(cmd->cache_blocks),
1731                            get_hint, policy);
1732}
1733
1734int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
1735{
1736        int r;
1737
1738        WRITE_LOCK(cmd);
1739        r = write_hints(cmd, policy);
1740        WRITE_UNLOCK(cmd);
1741
1742        return r;
1743}
1744
1745int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1746{
1747        int r;
1748
1749        READ_LOCK(cmd);
1750        r = blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1751        READ_UNLOCK(cmd);
1752
1753        return r;
1754}
1755
1756void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
1757{
1758        WRITE_LOCK_VOID(cmd);
1759        dm_bm_set_read_only(cmd->bm);
1760        WRITE_UNLOCK(cmd);
1761}
1762
1763void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
1764{
1765        WRITE_LOCK_VOID(cmd);
1766        dm_bm_set_read_write(cmd->bm);
1767        WRITE_UNLOCK(cmd);
1768}
1769
1770int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
1771{
1772        int r;
1773        struct dm_block *sblock;
1774        struct cache_disk_superblock *disk_super;
1775
1776        WRITE_LOCK(cmd);
1777        set_bit(NEEDS_CHECK, &cmd->flags);
1778
1779        r = superblock_lock(cmd, &sblock);
1780        if (r) {
1781                DMERR("couldn't read superblock");
1782                goto out;
1783        }
1784
1785        disk_super = dm_block_data(sblock);
1786        disk_super->flags = cpu_to_le32(cmd->flags);
1787
1788        dm_bm_unlock(sblock);
1789
1790out:
1791        WRITE_UNLOCK(cmd);
1792        return r;
1793}
1794
1795int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result)
1796{
1797        READ_LOCK(cmd);
1798        *result = !!test_bit(NEEDS_CHECK, &cmd->flags);
1799        READ_UNLOCK(cmd);
1800
1801        return 0;
1802}
1803
1804int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
1805{
1806        int r;
1807
1808        WRITE_LOCK(cmd);
1809        __destroy_persistent_data_objects(cmd);
1810        r = __create_persistent_data_objects(cmd, false);
1811        if (r)
1812                cmd->fail_io = true;
1813        WRITE_UNLOCK(cmd);
1814
1815        return r;
1816}
1817