1
2
3
4
5
6
7
8#include "dm-exception-store.h"
9
10#include <linux/mm.h>
11#include <linux/pagemap.h>
12#include <linux/vmalloc.h>
13#include <linux/slab.h>
14#include <linux/dm-io.h>
15
16#define DM_MSG_PREFIX "persistent snapshot"
17#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51#define SNAP_MAGIC 0x70416e53
52
53
54
55
56#define SNAPSHOT_DISK_VERSION 1
57
58#define NUM_SNAPSHOT_HDR_CHUNKS 1
59
60struct disk_header {
61 uint32_t magic;
62
63
64
65
66
67 uint32_t valid;
68
69
70
71
72
73 uint32_t version;
74
75
76 uint32_t chunk_size;
77};
78
79struct disk_exception {
80 uint64_t old_chunk;
81 uint64_t new_chunk;
82};
83
84struct commit_callback {
85 void (*callback)(void *, int success);
86 void *context;
87};
88
89
90
91
92struct pstore {
93 struct dm_exception_store *store;
94 int version;
95 int valid;
96 uint32_t exceptions_per_area;
97
98
99
100
101
102
103 void *area;
104
105
106
107
108 void *zero_area;
109
110
111
112
113
114
115 void *header_area;
116
117
118
119
120
121 chunk_t current_area;
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141 chunk_t next_free;
142
143
144
145
146
147 uint32_t current_committed;
148
149 atomic_t pending_count;
150 uint32_t callback_count;
151 struct commit_callback *callbacks;
152 struct dm_io_client *io_client;
153
154 struct workqueue_struct *metadata_wq;
155};
156
157static unsigned sectors_to_pages(unsigned sectors)
158{
159 return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
160}
161
162static int alloc_area(struct pstore *ps)
163{
164 int r = -ENOMEM;
165 size_t len;
166
167 len = ps->store->chunk_size << SECTOR_SHIFT;
168
169
170
171
172
173 ps->area = vmalloc(len);
174 if (!ps->area)
175 goto err_area;
176
177 ps->zero_area = vmalloc(len);
178 if (!ps->zero_area)
179 goto err_zero_area;
180 memset(ps->zero_area, 0, len);
181
182 ps->header_area = vmalloc(len);
183 if (!ps->header_area)
184 goto err_header_area;
185
186 return 0;
187
188err_header_area:
189 vfree(ps->zero_area);
190
191err_zero_area:
192 vfree(ps->area);
193
194err_area:
195 return r;
196}
197
198static void free_area(struct pstore *ps)
199{
200 if (ps->area)
201 vfree(ps->area);
202 ps->area = NULL;
203
204 if (ps->zero_area)
205 vfree(ps->zero_area);
206 ps->zero_area = NULL;
207
208 if (ps->header_area)
209 vfree(ps->header_area);
210 ps->header_area = NULL;
211}
212
213struct mdata_req {
214 struct dm_io_region *where;
215 struct dm_io_request *io_req;
216 struct work_struct work;
217 int result;
218};
219
220static void do_metadata(struct work_struct *work)
221{
222 struct mdata_req *req = container_of(work, struct mdata_req, work);
223
224 req->result = dm_io(req->io_req, 1, req->where, NULL);
225}
226
227
228
229
230static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
231 int metadata)
232{
233 struct dm_io_region where = {
234 .bdev = dm_snap_cow(ps->store->snap)->bdev,
235 .sector = ps->store->chunk_size * chunk,
236 .count = ps->store->chunk_size,
237 };
238 struct dm_io_request io_req = {
239 .bi_rw = rw,
240 .mem.type = DM_IO_VMA,
241 .mem.ptr.vma = area,
242 .client = ps->io_client,
243 .notify.fn = NULL,
244 };
245 struct mdata_req req;
246
247 if (!metadata)
248 return dm_io(&io_req, 1, &where, NULL);
249
250 req.where = &where;
251 req.io_req = &io_req;
252
253
254
255
256
257 INIT_WORK_ONSTACK(&req.work, do_metadata);
258 queue_work(ps->metadata_wq, &req.work);
259 flush_work(&req.work);
260
261 return req.result;
262}
263
264
265
266
267static chunk_t area_location(struct pstore *ps, chunk_t area)
268{
269 return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
270}
271
272
273
274
275
276static int area_io(struct pstore *ps, int rw)
277{
278 int r;
279 chunk_t chunk;
280
281 chunk = area_location(ps, ps->current_area);
282
283 r = chunk_io(ps, ps->area, chunk, rw, 0);
284 if (r)
285 return r;
286
287 return 0;
288}
289
290static void zero_memory_area(struct pstore *ps)
291{
292 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
293}
294
295static int zero_disk_area(struct pstore *ps, chunk_t area)
296{
297 return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
298}
299
300static int read_header(struct pstore *ps, int *new_snapshot)
301{
302 int r;
303 struct disk_header *dh;
304 unsigned chunk_size;
305 int chunk_size_supplied = 1;
306 char *chunk_err;
307
308
309
310
311
312 if (!ps->store->chunk_size) {
313 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
314 bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
315 bdev) >> 9);
316 ps->store->chunk_mask = ps->store->chunk_size - 1;
317 ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
318 chunk_size_supplied = 0;
319 }
320
321 ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
322 chunk_size));
323 if (IS_ERR(ps->io_client))
324 return PTR_ERR(ps->io_client);
325
326 r = alloc_area(ps);
327 if (r)
328 return r;
329
330 r = chunk_io(ps, ps->header_area, 0, READ, 1);
331 if (r)
332 goto bad;
333
334 dh = ps->header_area;
335
336 if (le32_to_cpu(dh->magic) == 0) {
337 *new_snapshot = 1;
338 return 0;
339 }
340
341 if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
342 DMWARN("Invalid or corrupt snapshot");
343 r = -ENXIO;
344 goto bad;
345 }
346
347 *new_snapshot = 0;
348 ps->valid = le32_to_cpu(dh->valid);
349 ps->version = le32_to_cpu(dh->version);
350 chunk_size = le32_to_cpu(dh->chunk_size);
351
352 if (ps->store->chunk_size == chunk_size)
353 return 0;
354
355 if (chunk_size_supplied)
356 DMWARN("chunk size %u in device metadata overrides "
357 "table chunk size of %u.",
358 chunk_size, ps->store->chunk_size);
359
360
361 free_area(ps);
362
363 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
364 &chunk_err);
365 if (r) {
366 DMERR("invalid on-disk chunk size %u: %s.",
367 chunk_size, chunk_err);
368 return r;
369 }
370
371 r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
372 ps->io_client);
373 if (r)
374 return r;
375
376 r = alloc_area(ps);
377 return r;
378
379bad:
380 free_area(ps);
381 return r;
382}
383
384static int write_header(struct pstore *ps)
385{
386 struct disk_header *dh;
387
388 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
389
390 dh = ps->header_area;
391 dh->magic = cpu_to_le32(SNAP_MAGIC);
392 dh->valid = cpu_to_le32(ps->valid);
393 dh->version = cpu_to_le32(ps->version);
394 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
395
396 return chunk_io(ps, ps->header_area, 0, WRITE, 1);
397}
398
399
400
401
402static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
403{
404 BUG_ON(index >= ps->exceptions_per_area);
405
406 return ((struct disk_exception *) ps->area) + index;
407}
408
409static void read_exception(struct pstore *ps,
410 uint32_t index, struct disk_exception *result)
411{
412 struct disk_exception *e = get_exception(ps, index);
413
414
415 result->old_chunk = le64_to_cpu(e->old_chunk);
416 result->new_chunk = le64_to_cpu(e->new_chunk);
417}
418
419static void write_exception(struct pstore *ps,
420 uint32_t index, struct disk_exception *de)
421{
422 struct disk_exception *e = get_exception(ps, index);
423
424
425 e->old_chunk = cpu_to_le64(de->old_chunk);
426 e->new_chunk = cpu_to_le64(de->new_chunk);
427}
428
429static void clear_exception(struct pstore *ps, uint32_t index)
430{
431 struct disk_exception *e = get_exception(ps, index);
432
433
434 e->old_chunk = 0;
435 e->new_chunk = 0;
436}
437
438
439
440
441
442
443static int insert_exceptions(struct pstore *ps,
444 int (*callback)(void *callback_context,
445 chunk_t old, chunk_t new),
446 void *callback_context,
447 int *full)
448{
449 int r;
450 unsigned int i;
451 struct disk_exception de;
452
453
454 *full = 1;
455
456 for (i = 0; i < ps->exceptions_per_area; i++) {
457 read_exception(ps, i, &de);
458
459
460
461
462
463
464
465 if (de.new_chunk == 0LL) {
466 ps->current_committed = i;
467 *full = 0;
468 break;
469 }
470
471
472
473
474 if (ps->next_free <= de.new_chunk)
475 ps->next_free = de.new_chunk + 1;
476
477
478
479
480 r = callback(callback_context, de.old_chunk, de.new_chunk);
481 if (r)
482 return r;
483 }
484
485 return 0;
486}
487
488static int read_exceptions(struct pstore *ps,
489 int (*callback)(void *callback_context, chunk_t old,
490 chunk_t new),
491 void *callback_context)
492{
493 int r, full = 1;
494
495
496
497
498
499 for (ps->current_area = 0; full; ps->current_area++) {
500 r = area_io(ps, READ);
501 if (r)
502 return r;
503
504 r = insert_exceptions(ps, callback, callback_context, &full);
505 if (r)
506 return r;
507 }
508
509 ps->current_area--;
510
511 return 0;
512}
513
514static struct pstore *get_info(struct dm_exception_store *store)
515{
516 return (struct pstore *) store->context;
517}
518
519static void persistent_usage(struct dm_exception_store *store,
520 sector_t *total_sectors,
521 sector_t *sectors_allocated,
522 sector_t *metadata_sectors)
523{
524 struct pstore *ps = get_info(store);
525
526 *sectors_allocated = ps->next_free * store->chunk_size;
527 *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
528
529
530
531
532
533
534 *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
535 store->chunk_size;
536}
537
538static void persistent_dtr(struct dm_exception_store *store)
539{
540 struct pstore *ps = get_info(store);
541
542 destroy_workqueue(ps->metadata_wq);
543
544
545 if (ps->io_client)
546 dm_io_client_destroy(ps->io_client);
547 free_area(ps);
548
549
550 if (ps->callbacks)
551 vfree(ps->callbacks);
552
553 kfree(ps);
554}
555
556static int persistent_read_metadata(struct dm_exception_store *store,
557 int (*callback)(void *callback_context,
558 chunk_t old, chunk_t new),
559 void *callback_context)
560{
561 int r, uninitialized_var(new_snapshot);
562 struct pstore *ps = get_info(store);
563
564
565
566
567 r = read_header(ps, &new_snapshot);
568 if (r)
569 return r;
570
571
572
573
574 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
575 sizeof(struct disk_exception);
576 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
577 sizeof(*ps->callbacks));
578 if (!ps->callbacks)
579 return -ENOMEM;
580
581
582
583
584 if (new_snapshot) {
585 r = write_header(ps);
586 if (r) {
587 DMWARN("write_header failed");
588 return r;
589 }
590
591 ps->current_area = 0;
592 zero_memory_area(ps);
593 r = zero_disk_area(ps, 0);
594 if (r)
595 DMWARN("zero_disk_area(0) failed");
596 return r;
597 }
598
599
600
601 if (ps->version != SNAPSHOT_DISK_VERSION) {
602 DMWARN("unable to handle snapshot disk version %d",
603 ps->version);
604 return -EINVAL;
605 }
606
607
608
609
610 if (!ps->valid)
611 return 1;
612
613
614
615
616 r = read_exceptions(ps, callback, callback_context);
617
618 return r;
619}
620
621static int persistent_prepare_exception(struct dm_exception_store *store,
622 struct dm_exception *e)
623{
624 struct pstore *ps = get_info(store);
625 uint32_t stride;
626 chunk_t next_free;
627 sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
628
629
630 if (size < ((ps->next_free + 1) * store->chunk_size))
631 return -ENOSPC;
632
633 e->new_chunk = ps->next_free;
634
635
636
637
638
639 stride = (ps->exceptions_per_area + 1);
640 next_free = ++ps->next_free;
641 if (sector_div(next_free, stride) == 1)
642 ps->next_free++;
643
644 atomic_inc(&ps->pending_count);
645 return 0;
646}
647
648static void persistent_commit_exception(struct dm_exception_store *store,
649 struct dm_exception *e,
650 void (*callback) (void *, int success),
651 void *callback_context)
652{
653 unsigned int i;
654 struct pstore *ps = get_info(store);
655 struct disk_exception de;
656 struct commit_callback *cb;
657
658 de.old_chunk = e->old_chunk;
659 de.new_chunk = e->new_chunk;
660 write_exception(ps, ps->current_committed++, &de);
661
662
663
664
665
666
667
668 cb = ps->callbacks + ps->callback_count++;
669 cb->callback = callback;
670 cb->context = callback_context;
671
672
673
674
675
676 if (!atomic_dec_and_test(&ps->pending_count) &&
677 (ps->current_committed != ps->exceptions_per_area))
678 return;
679
680
681
682
683 if ((ps->current_committed == ps->exceptions_per_area) &&
684 zero_disk_area(ps, ps->current_area + 1))
685 ps->valid = 0;
686
687
688
689
690 if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
691 ps->valid = 0;
692
693
694
695
696 if (ps->current_committed == ps->exceptions_per_area) {
697 ps->current_committed = 0;
698 ps->current_area++;
699 zero_memory_area(ps);
700 }
701
702 for (i = 0; i < ps->callback_count; i++) {
703 cb = ps->callbacks + i;
704 cb->callback(cb->context, ps->valid);
705 }
706
707 ps->callback_count = 0;
708}
709
710static int persistent_prepare_merge(struct dm_exception_store *store,
711 chunk_t *last_old_chunk,
712 chunk_t *last_new_chunk)
713{
714 struct pstore *ps = get_info(store);
715 struct disk_exception de;
716 int nr_consecutive;
717 int r;
718
719
720
721
722 if (!ps->current_committed) {
723
724
725
726 if (!ps->current_area)
727 return 0;
728
729 ps->current_area--;
730 r = area_io(ps, READ);
731 if (r < 0)
732 return r;
733 ps->current_committed = ps->exceptions_per_area;
734 }
735
736 read_exception(ps, ps->current_committed - 1, &de);
737 *last_old_chunk = de.old_chunk;
738 *last_new_chunk = de.new_chunk;
739
740
741
742
743
744 for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
745 nr_consecutive++) {
746 read_exception(ps, ps->current_committed - 1 - nr_consecutive,
747 &de);
748 if (de.old_chunk != *last_old_chunk - nr_consecutive ||
749 de.new_chunk != *last_new_chunk - nr_consecutive)
750 break;
751 }
752
753 return nr_consecutive;
754}
755
756static int persistent_commit_merge(struct dm_exception_store *store,
757 int nr_merged)
758{
759 int r, i;
760 struct pstore *ps = get_info(store);
761
762 BUG_ON(nr_merged > ps->current_committed);
763
764 for (i = 0; i < nr_merged; i++)
765 clear_exception(ps, ps->current_committed - 1 - i);
766
767 r = area_io(ps, WRITE);
768 if (r < 0)
769 return r;
770
771 ps->current_committed -= nr_merged;
772
773
774
775
776
777
778
779
780
781
782
783 ps->next_free = area_location(ps, ps->current_area) +
784 ps->current_committed + 1;
785
786 return 0;
787}
788
789static void persistent_drop_snapshot(struct dm_exception_store *store)
790{
791 struct pstore *ps = get_info(store);
792
793 ps->valid = 0;
794 if (write_header(ps))
795 DMWARN("write header failed");
796}
797
798static int persistent_ctr(struct dm_exception_store *store,
799 unsigned argc, char **argv)
800{
801 struct pstore *ps;
802
803
804 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
805 if (!ps)
806 return -ENOMEM;
807
808 ps->store = store;
809 ps->valid = 1;
810 ps->version = SNAPSHOT_DISK_VERSION;
811 ps->area = NULL;
812 ps->zero_area = NULL;
813 ps->header_area = NULL;
814 ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
815 ps->current_committed = 0;
816
817 ps->callback_count = 0;
818 atomic_set(&ps->pending_count, 0);
819 ps->callbacks = NULL;
820
821 ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
822 if (!ps->metadata_wq) {
823 kfree(ps);
824 DMERR("couldn't start header metadata update thread");
825 return -ENOMEM;
826 }
827
828 store->context = ps;
829
830 return 0;
831}
832
833static unsigned persistent_status(struct dm_exception_store *store,
834 status_type_t status, char *result,
835 unsigned maxlen)
836{
837 unsigned sz = 0;
838
839 switch (status) {
840 case STATUSTYPE_INFO:
841 break;
842 case STATUSTYPE_TABLE:
843 DMEMIT(" P %llu", (unsigned long long)store->chunk_size);
844 }
845
846 return sz;
847}
848
849static struct dm_exception_store_type _persistent_type = {
850 .name = "persistent",
851 .module = THIS_MODULE,
852 .ctr = persistent_ctr,
853 .dtr = persistent_dtr,
854 .read_metadata = persistent_read_metadata,
855 .prepare_exception = persistent_prepare_exception,
856 .commit_exception = persistent_commit_exception,
857 .prepare_merge = persistent_prepare_merge,
858 .commit_merge = persistent_commit_merge,
859 .drop_snapshot = persistent_drop_snapshot,
860 .usage = persistent_usage,
861 .status = persistent_status,
862};
863
864static struct dm_exception_store_type _persistent_compat_type = {
865 .name = "P",
866 .module = THIS_MODULE,
867 .ctr = persistent_ctr,
868 .dtr = persistent_dtr,
869 .read_metadata = persistent_read_metadata,
870 .prepare_exception = persistent_prepare_exception,
871 .commit_exception = persistent_commit_exception,
872 .prepare_merge = persistent_prepare_merge,
873 .commit_merge = persistent_commit_merge,
874 .drop_snapshot = persistent_drop_snapshot,
875 .usage = persistent_usage,
876 .status = persistent_status,
877};
878
879int dm_persistent_snapshot_init(void)
880{
881 int r;
882
883 r = dm_exception_store_type_register(&_persistent_type);
884 if (r) {
885 DMERR("Unable to register persistent exception store type");
886 return r;
887 }
888
889 r = dm_exception_store_type_register(&_persistent_compat_type);
890 if (r) {
891 DMERR("Unable to register old-style persistent exception "
892 "store type");
893 dm_exception_store_type_unregister(&_persistent_type);
894 return r;
895 }
896
897 return r;
898}
899
900void dm_persistent_snapshot_exit(void)
901{
902 dm_exception_store_type_unregister(&_persistent_type);
903 dm_exception_store_type_unregister(&_persistent_compat_type);
904}
905