linux/drivers/md/dm-snap-persistent.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
   3 * Copyright (C) 2006-2008 Red Hat GmbH
   4 *
   5 * This file is released under the GPL.
   6 */
   7
   8#include "dm-exception-store.h"
   9
  10#include <linux/mm.h>
  11#include <linux/pagemap.h>
  12#include <linux/vmalloc.h>
  13#include <linux/export.h>
  14#include <linux/slab.h>
  15#include <linux/dm-io.h>
  16#include "dm-bufio.h"
  17
  18#define DM_MSG_PREFIX "persistent snapshot"
  19#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32        /* 16KB */
  20
  21#define DM_PREFETCH_CHUNKS              12
  22
  23/*-----------------------------------------------------------------
  24 * Persistent snapshots, by persistent we mean that the snapshot
  25 * will survive a reboot.
  26 *---------------------------------------------------------------*/
  27
  28/*
  29 * We need to store a record of which parts of the origin have
  30 * been copied to the snapshot device.  The snapshot code
  31 * requires that we copy exception chunks to chunk aligned areas
  32 * of the COW store.  It makes sense therefore, to store the
  33 * metadata in chunk size blocks.
  34 *
  35 * There is no backward or forward compatibility implemented,
  36 * snapshots with different disk versions than the kernel will
  37 * not be usable.  It is expected that "lvcreate" will blank out
  38 * the start of a fresh COW device before calling the snapshot
  39 * constructor.
  40 *
  41 * The first chunk of the COW device just contains the header.
  42 * After this there is a chunk filled with exception metadata,
  43 * followed by as many exception chunks as can fit in the
  44 * metadata areas.
  45 *
  46 * All on disk structures are in little-endian format.  The end
  47 * of the exceptions info is indicated by an exception with a
  48 * new_chunk of 0, which is invalid since it would point to the
  49 * header chunk.
  50 */
  51
  52/*
  53 * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
  54 */
  55#define SNAP_MAGIC 0x70416e53
  56
  57/*
  58 * The on-disk version of the metadata.
  59 */
  60#define SNAPSHOT_DISK_VERSION 1
  61
  62#define NUM_SNAPSHOT_HDR_CHUNKS 1
  63
  64struct disk_header {
  65        __le32 magic;
  66
  67        /*
  68         * Is this snapshot valid.  There is no way of recovering
  69         * an invalid snapshot.
  70         */
  71        __le32 valid;
  72
  73        /*
  74         * Simple, incrementing version. no backward
  75         * compatibility.
  76         */
  77        __le32 version;
  78
  79        /* In sectors */
  80        __le32 chunk_size;
  81} __packed;
  82
  83struct disk_exception {
  84        __le64 old_chunk;
  85        __le64 new_chunk;
  86} __packed;
  87
  88struct core_exception {
  89        uint64_t old_chunk;
  90        uint64_t new_chunk;
  91};
  92
  93struct commit_callback {
  94        void (*callback)(void *, int success);
  95        void *context;
  96};
  97
  98/*
  99 * The top level structure for a persistent exception store.
 100 */
 101struct pstore {
 102        struct dm_exception_store *store;
 103        int version;
 104        int valid;
 105        uint32_t exceptions_per_area;
 106
 107        /*
 108         * Now that we have an asynchronous kcopyd there is no
 109         * need for large chunk sizes, so it wont hurt to have a
 110         * whole chunks worth of metadata in memory at once.
 111         */
 112        void *area;
 113
 114        /*
 115         * An area of zeros used to clear the next area.
 116         */
 117        void *zero_area;
 118
 119        /*
 120         * An area used for header. The header can be written
 121         * concurrently with metadata (when invalidating the snapshot),
 122         * so it needs a separate buffer.
 123         */
 124        void *header_area;
 125
 126        /*
 127         * Used to keep track of which metadata area the data in
 128         * 'chunk' refers to.
 129         */
 130        chunk_t current_area;
 131
 132        /*
 133         * The next free chunk for an exception.
 134         *
 135         * When creating exceptions, all the chunks here and above are
 136         * free.  It holds the next chunk to be allocated.  On rare
 137         * occasions (e.g. after a system crash) holes can be left in
 138         * the exception store because chunks can be committed out of
 139         * order.
 140         *
 141         * When merging exceptions, it does not necessarily mean all the
 142         * chunks here and above are free.  It holds the value it would
 143         * have held if all chunks had been committed in order of
 144         * allocation.  Consequently the value may occasionally be
 145         * slightly too low, but since it's only used for 'status' and
 146         * it can never reach its minimum value too early this doesn't
 147         * matter.
 148         */
 149
 150        chunk_t next_free;
 151
 152        /*
 153         * The index of next free exception in the current
 154         * metadata area.
 155         */
 156        uint32_t current_committed;
 157
 158        atomic_t pending_count;
 159        uint32_t callback_count;
 160        struct commit_callback *callbacks;
 161        struct dm_io_client *io_client;
 162
 163        struct workqueue_struct *metadata_wq;
 164};
 165
 166static int alloc_area(struct pstore *ps)
 167{
 168        int r = -ENOMEM;
 169        size_t len;
 170
 171        len = ps->store->chunk_size << SECTOR_SHIFT;
 172
 173        /*
 174         * Allocate the chunk_size block of memory that will hold
 175         * a single metadata area.
 176         */
 177        ps->area = vmalloc(len);
 178        if (!ps->area)
 179                goto err_area;
 180
 181        ps->zero_area = vzalloc(len);
 182        if (!ps->zero_area)
 183                goto err_zero_area;
 184
 185        ps->header_area = vmalloc(len);
 186        if (!ps->header_area)
 187                goto err_header_area;
 188
 189        return 0;
 190
 191err_header_area:
 192        vfree(ps->zero_area);
 193
 194err_zero_area:
 195        vfree(ps->area);
 196
 197err_area:
 198        return r;
 199}
 200
 201static void free_area(struct pstore *ps)
 202{
 203        vfree(ps->area);
 204        ps->area = NULL;
 205        vfree(ps->zero_area);
 206        ps->zero_area = NULL;
 207        vfree(ps->header_area);
 208        ps->header_area = NULL;
 209}
 210
 211struct mdata_req {
 212        struct dm_io_region *where;
 213        struct dm_io_request *io_req;
 214        struct work_struct work;
 215        int result;
 216};
 217
 218static void do_metadata(struct work_struct *work)
 219{
 220        struct mdata_req *req = container_of(work, struct mdata_req, work);
 221
 222        req->result = dm_io(req->io_req, 1, req->where, NULL);
 223}
 224
 225/*
 226 * Read or write a chunk aligned and sized block of data from a device.
 227 */
 228static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 229                    int metadata)
 230{
 231        struct dm_io_region where = {
 232                .bdev = dm_snap_cow(ps->store->snap)->bdev,
 233                .sector = ps->store->chunk_size * chunk,
 234                .count = ps->store->chunk_size,
 235        };
 236        struct dm_io_request io_req = {
 237                .bi_rw = rw,
 238                .mem.type = DM_IO_VMA,
 239                .mem.ptr.vma = area,
 240                .client = ps->io_client,
 241                .notify.fn = NULL,
 242        };
 243        struct mdata_req req;
 244
 245        if (!metadata)
 246                return dm_io(&io_req, 1, &where, NULL);
 247
 248        req.where = &where;
 249        req.io_req = &io_req;
 250
 251        /*
 252         * Issue the synchronous I/O from a different thread
 253         * to avoid generic_make_request recursion.
 254         */
 255        INIT_WORK_ONSTACK(&req.work, do_metadata);
 256        queue_work(ps->metadata_wq, &req.work);
 257        flush_workqueue(ps->metadata_wq);
 258        destroy_work_on_stack(&req.work);
 259
 260        return req.result;
 261}
 262
 263/*
 264 * Convert a metadata area index to a chunk index.
 265 */
 266static chunk_t area_location(struct pstore *ps, chunk_t area)
 267{
 268        return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
 269}
 270
 271static void skip_metadata(struct pstore *ps)
 272{
 273        uint32_t stride = ps->exceptions_per_area + 1;
 274        chunk_t next_free = ps->next_free;
 275        if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
 276                ps->next_free++;
 277}
 278
 279/*
 280 * Read or write a metadata area.  Remembering to skip the first
 281 * chunk which holds the header.
 282 */
 283static int area_io(struct pstore *ps, int rw)
 284{
 285        int r;
 286        chunk_t chunk;
 287
 288        chunk = area_location(ps, ps->current_area);
 289
 290        r = chunk_io(ps, ps->area, chunk, rw, 0);
 291        if (r)
 292                return r;
 293
 294        return 0;
 295}
 296
 297static void zero_memory_area(struct pstore *ps)
 298{
 299        memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 300}
 301
 302static int zero_disk_area(struct pstore *ps, chunk_t area)
 303{
 304        return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
 305}
 306
 307static int read_header(struct pstore *ps, int *new_snapshot)
 308{
 309        int r;
 310        struct disk_header *dh;
 311        unsigned chunk_size;
 312        int chunk_size_supplied = 1;
 313        char *chunk_err;
 314
 315        /*
 316         * Use default chunk size (or logical_block_size, if larger)
 317         * if none supplied
 318         */
 319        if (!ps->store->chunk_size) {
 320                ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
 321                    bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
 322                                            bdev) >> 9);
 323                ps->store->chunk_mask = ps->store->chunk_size - 1;
 324                ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
 325                chunk_size_supplied = 0;
 326        }
 327
 328        ps->io_client = dm_io_client_create();
 329        if (IS_ERR(ps->io_client))
 330                return PTR_ERR(ps->io_client);
 331
 332        r = alloc_area(ps);
 333        if (r)
 334                return r;
 335
 336        r = chunk_io(ps, ps->header_area, 0, READ, 1);
 337        if (r)
 338                goto bad;
 339
 340        dh = ps->header_area;
 341
 342        if (le32_to_cpu(dh->magic) == 0) {
 343                *new_snapshot = 1;
 344                return 0;
 345        }
 346
 347        if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
 348                DMWARN("Invalid or corrupt snapshot");
 349                r = -ENXIO;
 350                goto bad;
 351        }
 352
 353        *new_snapshot = 0;
 354        ps->valid = le32_to_cpu(dh->valid);
 355        ps->version = le32_to_cpu(dh->version);
 356        chunk_size = le32_to_cpu(dh->chunk_size);
 357
 358        if (ps->store->chunk_size == chunk_size)
 359                return 0;
 360
 361        if (chunk_size_supplied)
 362                DMWARN("chunk size %u in device metadata overrides "
 363                       "table chunk size of %u.",
 364                       chunk_size, ps->store->chunk_size);
 365
 366        /* We had a bogus chunk_size. Fix stuff up. */
 367        free_area(ps);
 368
 369        r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
 370                                              &chunk_err);
 371        if (r) {
 372                DMERR("invalid on-disk chunk size %u: %s.",
 373                      chunk_size, chunk_err);
 374                return r;
 375        }
 376
 377        r = alloc_area(ps);
 378        return r;
 379
 380bad:
 381        free_area(ps);
 382        return r;
 383}
 384
 385static int write_header(struct pstore *ps)
 386{
 387        struct disk_header *dh;
 388
 389        memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
 390
 391        dh = ps->header_area;
 392        dh->magic = cpu_to_le32(SNAP_MAGIC);
 393        dh->valid = cpu_to_le32(ps->valid);
 394        dh->version = cpu_to_le32(ps->version);
 395        dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 396
 397        return chunk_io(ps, ps->header_area, 0, WRITE, 1);
 398}
 399
 400/*
 401 * Access functions for the disk exceptions, these do the endian conversions.
 402 */
 403static struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
 404                                            uint32_t index)
 405{
 406        BUG_ON(index >= ps->exceptions_per_area);
 407
 408        return ((struct disk_exception *) ps_area) + index;
 409}
 410
 411static void read_exception(struct pstore *ps, void *ps_area,
 412                           uint32_t index, struct core_exception *result)
 413{
 414        struct disk_exception *de = get_exception(ps, ps_area, index);
 415
 416        /* copy it */
 417        result->old_chunk = le64_to_cpu(de->old_chunk);
 418        result->new_chunk = le64_to_cpu(de->new_chunk);
 419}
 420
 421static void write_exception(struct pstore *ps,
 422                            uint32_t index, struct core_exception *e)
 423{
 424        struct disk_exception *de = get_exception(ps, ps->area, index);
 425
 426        /* copy it */
 427        de->old_chunk = cpu_to_le64(e->old_chunk);
 428        de->new_chunk = cpu_to_le64(e->new_chunk);
 429}
 430
 431static void clear_exception(struct pstore *ps, uint32_t index)
 432{
 433        struct disk_exception *de = get_exception(ps, ps->area, index);
 434
 435        /* clear it */
 436        de->old_chunk = 0;
 437        de->new_chunk = 0;
 438}
 439
 440/*
 441 * Registers the exceptions that are present in the current area.
 442 * 'full' is filled in to indicate if the area has been
 443 * filled.
 444 */
 445static int insert_exceptions(struct pstore *ps, void *ps_area,
 446                             int (*callback)(void *callback_context,
 447                                             chunk_t old, chunk_t new),
 448                             void *callback_context,
 449                             int *full)
 450{
 451        int r;
 452        unsigned int i;
 453        struct core_exception e;
 454
 455        /* presume the area is full */
 456        *full = 1;
 457
 458        for (i = 0; i < ps->exceptions_per_area; i++) {
 459                read_exception(ps, ps_area, i, &e);
 460
 461                /*
 462                 * If the new_chunk is pointing at the start of
 463                 * the COW device, where the first metadata area
 464                 * is we know that we've hit the end of the
 465                 * exceptions.  Therefore the area is not full.
 466                 */
 467                if (e.new_chunk == 0LL) {
 468                        ps->current_committed = i;
 469                        *full = 0;
 470                        break;
 471                }
 472
 473                /*
 474                 * Keep track of the start of the free chunks.
 475                 */
 476                if (ps->next_free <= e.new_chunk)
 477                        ps->next_free = e.new_chunk + 1;
 478
 479                /*
 480                 * Otherwise we add the exception to the snapshot.
 481                 */
 482                r = callback(callback_context, e.old_chunk, e.new_chunk);
 483                if (r)
 484                        return r;
 485        }
 486
 487        return 0;
 488}
 489
 490static int read_exceptions(struct pstore *ps,
 491                           int (*callback)(void *callback_context, chunk_t old,
 492                                           chunk_t new),
 493                           void *callback_context)
 494{
 495        int r, full = 1;
 496        struct dm_bufio_client *client;
 497        chunk_t prefetch_area = 0;
 498
 499        client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
 500                                        ps->store->chunk_size << SECTOR_SHIFT,
 501                                        1, 0, NULL, NULL);
 502
 503        if (IS_ERR(client))
 504                return PTR_ERR(client);
 505
 506        /*
 507         * Setup for one current buffer + desired readahead buffers.
 508         */
 509        dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
 510
 511        /*
 512         * Keeping reading chunks and inserting exceptions until
 513         * we find a partially full area.
 514         */
 515        for (ps->current_area = 0; full; ps->current_area++) {
 516                struct dm_buffer *bp;
 517                void *area;
 518                chunk_t chunk;
 519
 520                if (unlikely(prefetch_area < ps->current_area))
 521                        prefetch_area = ps->current_area;
 522
 523                if (DM_PREFETCH_CHUNKS) do {
 524                        chunk_t pf_chunk = area_location(ps, prefetch_area);
 525                        if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
 526                                break;
 527                        dm_bufio_prefetch(client, pf_chunk, 1);
 528                        prefetch_area++;
 529                        if (unlikely(!prefetch_area))
 530                                break;
 531                } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
 532
 533                chunk = area_location(ps, ps->current_area);
 534
 535                area = dm_bufio_read(client, chunk, &bp);
 536                if (unlikely(IS_ERR(area))) {
 537                        r = PTR_ERR(area);
 538                        goto ret_destroy_bufio;
 539                }
 540
 541                r = insert_exceptions(ps, area, callback, callback_context,
 542                                      &full);
 543
 544                if (!full)
 545                        memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
 546
 547                dm_bufio_release(bp);
 548
 549                dm_bufio_forget(client, chunk);
 550
 551                if (unlikely(r))
 552                        goto ret_destroy_bufio;
 553        }
 554
 555        ps->current_area--;
 556
 557        skip_metadata(ps);
 558
 559        r = 0;
 560
 561ret_destroy_bufio:
 562        dm_bufio_client_destroy(client);
 563
 564        return r;
 565}
 566
 567static struct pstore *get_info(struct dm_exception_store *store)
 568{
 569        return (struct pstore *) store->context;
 570}
 571
 572static void persistent_usage(struct dm_exception_store *store,
 573                             sector_t *total_sectors,
 574                             sector_t *sectors_allocated,
 575                             sector_t *metadata_sectors)
 576{
 577        struct pstore *ps = get_info(store);
 578
 579        *sectors_allocated = ps->next_free * store->chunk_size;
 580        *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
 581
 582        /*
 583         * First chunk is the fixed header.
 584         * Then there are (ps->current_area + 1) metadata chunks, each one
 585         * separated from the next by ps->exceptions_per_area data chunks.
 586         */
 587        *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
 588                            store->chunk_size;
 589}
 590
 591static void persistent_dtr(struct dm_exception_store *store)
 592{
 593        struct pstore *ps = get_info(store);
 594
 595        destroy_workqueue(ps->metadata_wq);
 596
 597        /* Created in read_header */
 598        if (ps->io_client)
 599                dm_io_client_destroy(ps->io_client);
 600        free_area(ps);
 601
 602        /* Allocated in persistent_read_metadata */
 603        vfree(ps->callbacks);
 604
 605        kfree(ps);
 606}
 607
 608static int persistent_read_metadata(struct dm_exception_store *store,
 609                                    int (*callback)(void *callback_context,
 610                                                    chunk_t old, chunk_t new),
 611                                    void *callback_context)
 612{
 613        int r, uninitialized_var(new_snapshot);
 614        struct pstore *ps = get_info(store);
 615
 616        /*
 617         * Read the snapshot header.
 618         */
 619        r = read_header(ps, &new_snapshot);
 620        if (r)
 621                return r;
 622
 623        /*
 624         * Now we know correct chunk_size, complete the initialisation.
 625         */
 626        ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
 627                                  sizeof(struct disk_exception);
 628        ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
 629                                   sizeof(*ps->callbacks));
 630        if (!ps->callbacks)
 631                return -ENOMEM;
 632
 633        /*
 634         * Do we need to setup a new snapshot ?
 635         */
 636        if (new_snapshot) {
 637                r = write_header(ps);
 638                if (r) {
 639                        DMWARN("write_header failed");
 640                        return r;
 641                }
 642
 643                ps->current_area = 0;
 644                zero_memory_area(ps);
 645                r = zero_disk_area(ps, 0);
 646                if (r)
 647                        DMWARN("zero_disk_area(0) failed");
 648                return r;
 649        }
 650        /*
 651         * Sanity checks.
 652         */
 653        if (ps->version != SNAPSHOT_DISK_VERSION) {
 654                DMWARN("unable to handle snapshot disk version %d",
 655                       ps->version);
 656                return -EINVAL;
 657        }
 658
 659        /*
 660         * Metadata are valid, but snapshot is invalidated
 661         */
 662        if (!ps->valid)
 663                return 1;
 664
 665        /*
 666         * Read the metadata.
 667         */
 668        r = read_exceptions(ps, callback, callback_context);
 669
 670        return r;
 671}
 672
 673static int persistent_prepare_exception(struct dm_exception_store *store,
 674                                        struct dm_exception *e)
 675{
 676        struct pstore *ps = get_info(store);
 677        sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
 678
 679        /* Is there enough room ? */
 680        if (size < ((ps->next_free + 1) * store->chunk_size))
 681                return -ENOSPC;
 682
 683        e->new_chunk = ps->next_free;
 684
 685        /*
 686         * Move onto the next free pending, making sure to take
 687         * into account the location of the metadata chunks.
 688         */
 689        ps->next_free++;
 690        skip_metadata(ps);
 691
 692        atomic_inc(&ps->pending_count);
 693        return 0;
 694}
 695
 696static void persistent_commit_exception(struct dm_exception_store *store,
 697                                        struct dm_exception *e,
 698                                        void (*callback) (void *, int success),
 699                                        void *callback_context)
 700{
 701        unsigned int i;
 702        struct pstore *ps = get_info(store);
 703        struct core_exception ce;
 704        struct commit_callback *cb;
 705
 706        ce.old_chunk = e->old_chunk;
 707        ce.new_chunk = e->new_chunk;
 708        write_exception(ps, ps->current_committed++, &ce);
 709
 710        /*
 711         * Add the callback to the back of the array.  This code
 712         * is the only place where the callback array is
 713         * manipulated, and we know that it will never be called
 714         * multiple times concurrently.
 715         */
 716        cb = ps->callbacks + ps->callback_count++;
 717        cb->callback = callback;
 718        cb->context = callback_context;
 719
 720        /*
 721         * If there are exceptions in flight and we have not yet
 722         * filled this metadata area there's nothing more to do.
 723         */
 724        if (!atomic_dec_and_test(&ps->pending_count) &&
 725            (ps->current_committed != ps->exceptions_per_area))
 726                return;
 727
 728        /*
 729         * If we completely filled the current area, then wipe the next one.
 730         */
 731        if ((ps->current_committed == ps->exceptions_per_area) &&
 732            zero_disk_area(ps, ps->current_area + 1))
 733                ps->valid = 0;
 734
 735        /*
 736         * Commit exceptions to disk.
 737         */
 738        if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
 739                ps->valid = 0;
 740
 741        /*
 742         * Advance to the next area if this one is full.
 743         */
 744        if (ps->current_committed == ps->exceptions_per_area) {
 745                ps->current_committed = 0;
 746                ps->current_area++;
 747                zero_memory_area(ps);
 748        }
 749
 750        for (i = 0; i < ps->callback_count; i++) {
 751                cb = ps->callbacks + i;
 752                cb->callback(cb->context, ps->valid);
 753        }
 754
 755        ps->callback_count = 0;
 756}
 757
 758static int persistent_prepare_merge(struct dm_exception_store *store,
 759                                    chunk_t *last_old_chunk,
 760                                    chunk_t *last_new_chunk)
 761{
 762        struct pstore *ps = get_info(store);
 763        struct core_exception ce;
 764        int nr_consecutive;
 765        int r;
 766
 767        /*
 768         * When current area is empty, move back to preceding area.
 769         */
 770        if (!ps->current_committed) {
 771                /*
 772                 * Have we finished?
 773                 */
 774                if (!ps->current_area)
 775                        return 0;
 776
 777                ps->current_area--;
 778                r = area_io(ps, READ);
 779                if (r < 0)
 780                        return r;
 781                ps->current_committed = ps->exceptions_per_area;
 782        }
 783
 784        read_exception(ps, ps->area, ps->current_committed - 1, &ce);
 785        *last_old_chunk = ce.old_chunk;
 786        *last_new_chunk = ce.new_chunk;
 787
 788        /*
 789         * Find number of consecutive chunks within the current area,
 790         * working backwards.
 791         */
 792        for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
 793             nr_consecutive++) {
 794                read_exception(ps, ps->area,
 795                               ps->current_committed - 1 - nr_consecutive, &ce);
 796                if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
 797                    ce.new_chunk != *last_new_chunk - nr_consecutive)
 798                        break;
 799        }
 800
 801        return nr_consecutive;
 802}
 803
 804static int persistent_commit_merge(struct dm_exception_store *store,
 805                                   int nr_merged)
 806{
 807        int r, i;
 808        struct pstore *ps = get_info(store);
 809
 810        BUG_ON(nr_merged > ps->current_committed);
 811
 812        for (i = 0; i < nr_merged; i++)
 813                clear_exception(ps, ps->current_committed - 1 - i);
 814
 815        r = area_io(ps, WRITE_FLUSH_FUA);
 816        if (r < 0)
 817                return r;
 818
 819        ps->current_committed -= nr_merged;
 820
 821        /*
 822         * At this stage, only persistent_usage() uses ps->next_free, so
 823         * we make no attempt to keep ps->next_free strictly accurate
 824         * as exceptions may have been committed out-of-order originally.
 825         * Once a snapshot has become merging, we set it to the value it
 826         * would have held had all the exceptions been committed in order.
 827         *
 828         * ps->current_area does not get reduced by prepare_merge() until
 829         * after commit_merge() has removed the nr_merged previous exceptions.
 830         */
 831        ps->next_free = area_location(ps, ps->current_area) +
 832                        ps->current_committed + 1;
 833
 834        return 0;
 835}
 836
 837static void persistent_drop_snapshot(struct dm_exception_store *store)
 838{
 839        struct pstore *ps = get_info(store);
 840
 841        ps->valid = 0;
 842        if (write_header(ps))
 843                DMWARN("write header failed");
 844}
 845
 846static int persistent_ctr(struct dm_exception_store *store,
 847                          unsigned argc, char **argv)
 848{
 849        struct pstore *ps;
 850
 851        /* allocate the pstore */
 852        ps = kzalloc(sizeof(*ps), GFP_KERNEL);
 853        if (!ps)
 854                return -ENOMEM;
 855
 856        ps->store = store;
 857        ps->valid = 1;
 858        ps->version = SNAPSHOT_DISK_VERSION;
 859        ps->area = NULL;
 860        ps->zero_area = NULL;
 861        ps->header_area = NULL;
 862        ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1; /* header and 1st area */
 863        ps->current_committed = 0;
 864
 865        ps->callback_count = 0;
 866        atomic_set(&ps->pending_count, 0);
 867        ps->callbacks = NULL;
 868
 869        ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
 870        if (!ps->metadata_wq) {
 871                kfree(ps);
 872                DMERR("couldn't start header metadata update thread");
 873                return -ENOMEM;
 874        }
 875
 876        store->context = ps;
 877
 878        return 0;
 879}
 880
 881static unsigned persistent_status(struct dm_exception_store *store,
 882                                  status_type_t status, char *result,
 883                                  unsigned maxlen)
 884{
 885        unsigned sz = 0;
 886
 887        switch (status) {
 888        case STATUSTYPE_INFO:
 889                break;
 890        case STATUSTYPE_TABLE:
 891                DMEMIT(" P %llu", (unsigned long long)store->chunk_size);
 892        }
 893
 894        return sz;
 895}
 896
 897static struct dm_exception_store_type _persistent_type = {
 898        .name = "persistent",
 899        .module = THIS_MODULE,
 900        .ctr = persistent_ctr,
 901        .dtr = persistent_dtr,
 902        .read_metadata = persistent_read_metadata,
 903        .prepare_exception = persistent_prepare_exception,
 904        .commit_exception = persistent_commit_exception,
 905        .prepare_merge = persistent_prepare_merge,
 906        .commit_merge = persistent_commit_merge,
 907        .drop_snapshot = persistent_drop_snapshot,
 908        .usage = persistent_usage,
 909        .status = persistent_status,
 910};
 911
 912static struct dm_exception_store_type _persistent_compat_type = {
 913        .name = "P",
 914        .module = THIS_MODULE,
 915        .ctr = persistent_ctr,
 916        .dtr = persistent_dtr,
 917        .read_metadata = persistent_read_metadata,
 918        .prepare_exception = persistent_prepare_exception,
 919        .commit_exception = persistent_commit_exception,
 920        .prepare_merge = persistent_prepare_merge,
 921        .commit_merge = persistent_commit_merge,
 922        .drop_snapshot = persistent_drop_snapshot,
 923        .usage = persistent_usage,
 924        .status = persistent_status,
 925};
 926
 927int dm_persistent_snapshot_init(void)
 928{
 929        int r;
 930
 931        r = dm_exception_store_type_register(&_persistent_type);
 932        if (r) {
 933                DMERR("Unable to register persistent exception store type");
 934                return r;
 935        }
 936
 937        r = dm_exception_store_type_register(&_persistent_compat_type);
 938        if (r) {
 939                DMERR("Unable to register old-style persistent exception "
 940                      "store type");
 941                dm_exception_store_type_unregister(&_persistent_type);
 942                return r;
 943        }
 944
 945        return r;
 946}
 947
 948void dm_persistent_snapshot_exit(void)
 949{
 950        dm_exception_store_type_unregister(&_persistent_type);
 951        dm_exception_store_type_unregister(&_persistent_compat_type);
 952}
 953