1
2
3
4
5
6
7
8#include "dm-exception-store.h"
9
10#include <linux/mm.h>
11#include <linux/pagemap.h>
12#include <linux/vmalloc.h>
13#include <linux/export.h>
14#include <linux/slab.h>
15#include <linux/dm-io.h>
16#include "dm-bufio.h"
17
18#define DM_MSG_PREFIX "persistent snapshot"
19#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32
20
21#define DM_PREFETCH_CHUNKS 12
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#define SNAP_MAGIC 0x70416e53
56
57
58
59
60#define SNAPSHOT_DISK_VERSION 1
61
62#define NUM_SNAPSHOT_HDR_CHUNKS 1
63
64struct disk_header {
65 __le32 magic;
66
67
68
69
70
71 __le32 valid;
72
73
74
75
76
77 __le32 version;
78
79
80 __le32 chunk_size;
81} __packed;
82
83struct disk_exception {
84 __le64 old_chunk;
85 __le64 new_chunk;
86} __packed;
87
88struct core_exception {
89 uint64_t old_chunk;
90 uint64_t new_chunk;
91};
92
93struct commit_callback {
94 void (*callback)(void *, int success);
95 void *context;
96};
97
98
99
100
101struct pstore {
102 struct dm_exception_store *store;
103 int version;
104 int valid;
105 uint32_t exceptions_per_area;
106
107
108
109
110
111
112 void *area;
113
114
115
116
117 void *zero_area;
118
119
120
121
122
123
124 void *header_area;
125
126
127
128
129
130 chunk_t current_area;
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 chunk_t next_free;
151
152
153
154
155
156 uint32_t current_committed;
157
158 atomic_t pending_count;
159 uint32_t callback_count;
160 struct commit_callback *callbacks;
161 struct dm_io_client *io_client;
162
163 struct workqueue_struct *metadata_wq;
164};
165
166static int alloc_area(struct pstore *ps)
167{
168 int r = -ENOMEM;
169 size_t len;
170
171 len = ps->store->chunk_size << SECTOR_SHIFT;
172
173
174
175
176
177 ps->area = vmalloc(len);
178 if (!ps->area)
179 goto err_area;
180
181 ps->zero_area = vzalloc(len);
182 if (!ps->zero_area)
183 goto err_zero_area;
184
185 ps->header_area = vmalloc(len);
186 if (!ps->header_area)
187 goto err_header_area;
188
189 return 0;
190
191err_header_area:
192 vfree(ps->zero_area);
193
194err_zero_area:
195 vfree(ps->area);
196
197err_area:
198 return r;
199}
200
201static void free_area(struct pstore *ps)
202{
203 vfree(ps->area);
204 ps->area = NULL;
205 vfree(ps->zero_area);
206 ps->zero_area = NULL;
207 vfree(ps->header_area);
208 ps->header_area = NULL;
209}
210
211struct mdata_req {
212 struct dm_io_region *where;
213 struct dm_io_request *io_req;
214 struct work_struct work;
215 int result;
216};
217
218static void do_metadata(struct work_struct *work)
219{
220 struct mdata_req *req = container_of(work, struct mdata_req, work);
221
222 req->result = dm_io(req->io_req, 1, req->where, NULL);
223}
224
225
226
227
228static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
229 int metadata)
230{
231 struct dm_io_region where = {
232 .bdev = dm_snap_cow(ps->store->snap)->bdev,
233 .sector = ps->store->chunk_size * chunk,
234 .count = ps->store->chunk_size,
235 };
236 struct dm_io_request io_req = {
237 .bi_rw = rw,
238 .mem.type = DM_IO_VMA,
239 .mem.ptr.vma = area,
240 .client = ps->io_client,
241 .notify.fn = NULL,
242 };
243 struct mdata_req req;
244
245 if (!metadata)
246 return dm_io(&io_req, 1, &where, NULL);
247
248 req.where = &where;
249 req.io_req = &io_req;
250
251
252
253
254
255 INIT_WORK_ONSTACK(&req.work, do_metadata);
256 queue_work(ps->metadata_wq, &req.work);
257 flush_workqueue(ps->metadata_wq);
258 destroy_work_on_stack(&req.work);
259
260 return req.result;
261}
262
263
264
265
266static chunk_t area_location(struct pstore *ps, chunk_t area)
267{
268 return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
269}
270
271static void skip_metadata(struct pstore *ps)
272{
273 uint32_t stride = ps->exceptions_per_area + 1;
274 chunk_t next_free = ps->next_free;
275 if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
276 ps->next_free++;
277}
278
279
280
281
282
283static int area_io(struct pstore *ps, int rw)
284{
285 int r;
286 chunk_t chunk;
287
288 chunk = area_location(ps, ps->current_area);
289
290 r = chunk_io(ps, ps->area, chunk, rw, 0);
291 if (r)
292 return r;
293
294 return 0;
295}
296
297static void zero_memory_area(struct pstore *ps)
298{
299 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
300}
301
302static int zero_disk_area(struct pstore *ps, chunk_t area)
303{
304 return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
305}
306
307static int read_header(struct pstore *ps, int *new_snapshot)
308{
309 int r;
310 struct disk_header *dh;
311 unsigned chunk_size;
312 int chunk_size_supplied = 1;
313 char *chunk_err;
314
315
316
317
318
319 if (!ps->store->chunk_size) {
320 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
321 bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
322 bdev) >> 9);
323 ps->store->chunk_mask = ps->store->chunk_size - 1;
324 ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
325 chunk_size_supplied = 0;
326 }
327
328 ps->io_client = dm_io_client_create();
329 if (IS_ERR(ps->io_client))
330 return PTR_ERR(ps->io_client);
331
332 r = alloc_area(ps);
333 if (r)
334 return r;
335
336 r = chunk_io(ps, ps->header_area, 0, READ, 1);
337 if (r)
338 goto bad;
339
340 dh = ps->header_area;
341
342 if (le32_to_cpu(dh->magic) == 0) {
343 *new_snapshot = 1;
344 return 0;
345 }
346
347 if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
348 DMWARN("Invalid or corrupt snapshot");
349 r = -ENXIO;
350 goto bad;
351 }
352
353 *new_snapshot = 0;
354 ps->valid = le32_to_cpu(dh->valid);
355 ps->version = le32_to_cpu(dh->version);
356 chunk_size = le32_to_cpu(dh->chunk_size);
357
358 if (ps->store->chunk_size == chunk_size)
359 return 0;
360
361 if (chunk_size_supplied)
362 DMWARN("chunk size %u in device metadata overrides "
363 "table chunk size of %u.",
364 chunk_size, ps->store->chunk_size);
365
366
367 free_area(ps);
368
369 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
370 &chunk_err);
371 if (r) {
372 DMERR("invalid on-disk chunk size %u: %s.",
373 chunk_size, chunk_err);
374 return r;
375 }
376
377 r = alloc_area(ps);
378 return r;
379
380bad:
381 free_area(ps);
382 return r;
383}
384
385static int write_header(struct pstore *ps)
386{
387 struct disk_header *dh;
388
389 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
390
391 dh = ps->header_area;
392 dh->magic = cpu_to_le32(SNAP_MAGIC);
393 dh->valid = cpu_to_le32(ps->valid);
394 dh->version = cpu_to_le32(ps->version);
395 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
396
397 return chunk_io(ps, ps->header_area, 0, WRITE, 1);
398}
399
400
401
402
403static struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
404 uint32_t index)
405{
406 BUG_ON(index >= ps->exceptions_per_area);
407
408 return ((struct disk_exception *) ps_area) + index;
409}
410
411static void read_exception(struct pstore *ps, void *ps_area,
412 uint32_t index, struct core_exception *result)
413{
414 struct disk_exception *de = get_exception(ps, ps_area, index);
415
416
417 result->old_chunk = le64_to_cpu(de->old_chunk);
418 result->new_chunk = le64_to_cpu(de->new_chunk);
419}
420
421static void write_exception(struct pstore *ps,
422 uint32_t index, struct core_exception *e)
423{
424 struct disk_exception *de = get_exception(ps, ps->area, index);
425
426
427 de->old_chunk = cpu_to_le64(e->old_chunk);
428 de->new_chunk = cpu_to_le64(e->new_chunk);
429}
430
431static void clear_exception(struct pstore *ps, uint32_t index)
432{
433 struct disk_exception *de = get_exception(ps, ps->area, index);
434
435
436 de->old_chunk = 0;
437 de->new_chunk = 0;
438}
439
440
441
442
443
444
445static int insert_exceptions(struct pstore *ps, void *ps_area,
446 int (*callback)(void *callback_context,
447 chunk_t old, chunk_t new),
448 void *callback_context,
449 int *full)
450{
451 int r;
452 unsigned int i;
453 struct core_exception e;
454
455
456 *full = 1;
457
458 for (i = 0; i < ps->exceptions_per_area; i++) {
459 read_exception(ps, ps_area, i, &e);
460
461
462
463
464
465
466
467 if (e.new_chunk == 0LL) {
468 ps->current_committed = i;
469 *full = 0;
470 break;
471 }
472
473
474
475
476 if (ps->next_free <= e.new_chunk)
477 ps->next_free = e.new_chunk + 1;
478
479
480
481
482 r = callback(callback_context, e.old_chunk, e.new_chunk);
483 if (r)
484 return r;
485 }
486
487 return 0;
488}
489
490static int read_exceptions(struct pstore *ps,
491 int (*callback)(void *callback_context, chunk_t old,
492 chunk_t new),
493 void *callback_context)
494{
495 int r, full = 1;
496 struct dm_bufio_client *client;
497 chunk_t prefetch_area = 0;
498
499 client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
500 ps->store->chunk_size << SECTOR_SHIFT,
501 1, 0, NULL, NULL);
502
503 if (IS_ERR(client))
504 return PTR_ERR(client);
505
506
507
508
509 dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
510
511
512
513
514
515 for (ps->current_area = 0; full; ps->current_area++) {
516 struct dm_buffer *bp;
517 void *area;
518 chunk_t chunk;
519
520 if (unlikely(prefetch_area < ps->current_area))
521 prefetch_area = ps->current_area;
522
523 if (DM_PREFETCH_CHUNKS) do {
524 chunk_t pf_chunk = area_location(ps, prefetch_area);
525 if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
526 break;
527 dm_bufio_prefetch(client, pf_chunk, 1);
528 prefetch_area++;
529 if (unlikely(!prefetch_area))
530 break;
531 } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
532
533 chunk = area_location(ps, ps->current_area);
534
535 area = dm_bufio_read(client, chunk, &bp);
536 if (unlikely(IS_ERR(area))) {
537 r = PTR_ERR(area);
538 goto ret_destroy_bufio;
539 }
540
541 r = insert_exceptions(ps, area, callback, callback_context,
542 &full);
543
544 if (!full)
545 memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
546
547 dm_bufio_release(bp);
548
549 dm_bufio_forget(client, chunk);
550
551 if (unlikely(r))
552 goto ret_destroy_bufio;
553 }
554
555 ps->current_area--;
556
557 skip_metadata(ps);
558
559 r = 0;
560
561ret_destroy_bufio:
562 dm_bufio_client_destroy(client);
563
564 return r;
565}
566
567static struct pstore *get_info(struct dm_exception_store *store)
568{
569 return (struct pstore *) store->context;
570}
571
572static void persistent_usage(struct dm_exception_store *store,
573 sector_t *total_sectors,
574 sector_t *sectors_allocated,
575 sector_t *metadata_sectors)
576{
577 struct pstore *ps = get_info(store);
578
579 *sectors_allocated = ps->next_free * store->chunk_size;
580 *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
581
582
583
584
585
586
587 *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
588 store->chunk_size;
589}
590
591static void persistent_dtr(struct dm_exception_store *store)
592{
593 struct pstore *ps = get_info(store);
594
595 destroy_workqueue(ps->metadata_wq);
596
597
598 if (ps->io_client)
599 dm_io_client_destroy(ps->io_client);
600 free_area(ps);
601
602
603 vfree(ps->callbacks);
604
605 kfree(ps);
606}
607
608static int persistent_read_metadata(struct dm_exception_store *store,
609 int (*callback)(void *callback_context,
610 chunk_t old, chunk_t new),
611 void *callback_context)
612{
613 int r, uninitialized_var(new_snapshot);
614 struct pstore *ps = get_info(store);
615
616
617
618
619 r = read_header(ps, &new_snapshot);
620 if (r)
621 return r;
622
623
624
625
626 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
627 sizeof(struct disk_exception);
628 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
629 sizeof(*ps->callbacks));
630 if (!ps->callbacks)
631 return -ENOMEM;
632
633
634
635
636 if (new_snapshot) {
637 r = write_header(ps);
638 if (r) {
639 DMWARN("write_header failed");
640 return r;
641 }
642
643 ps->current_area = 0;
644 zero_memory_area(ps);
645 r = zero_disk_area(ps, 0);
646 if (r)
647 DMWARN("zero_disk_area(0) failed");
648 return r;
649 }
650
651
652
653 if (ps->version != SNAPSHOT_DISK_VERSION) {
654 DMWARN("unable to handle snapshot disk version %d",
655 ps->version);
656 return -EINVAL;
657 }
658
659
660
661
662 if (!ps->valid)
663 return 1;
664
665
666
667
668 r = read_exceptions(ps, callback, callback_context);
669
670 return r;
671}
672
673static int persistent_prepare_exception(struct dm_exception_store *store,
674 struct dm_exception *e)
675{
676 struct pstore *ps = get_info(store);
677 sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
678
679
680 if (size < ((ps->next_free + 1) * store->chunk_size))
681 return -ENOSPC;
682
683 e->new_chunk = ps->next_free;
684
685
686
687
688
689 ps->next_free++;
690 skip_metadata(ps);
691
692 atomic_inc(&ps->pending_count);
693 return 0;
694}
695
696static void persistent_commit_exception(struct dm_exception_store *store,
697 struct dm_exception *e,
698 void (*callback) (void *, int success),
699 void *callback_context)
700{
701 unsigned int i;
702 struct pstore *ps = get_info(store);
703 struct core_exception ce;
704 struct commit_callback *cb;
705
706 ce.old_chunk = e->old_chunk;
707 ce.new_chunk = e->new_chunk;
708 write_exception(ps, ps->current_committed++, &ce);
709
710
711
712
713
714
715
716 cb = ps->callbacks + ps->callback_count++;
717 cb->callback = callback;
718 cb->context = callback_context;
719
720
721
722
723
724 if (!atomic_dec_and_test(&ps->pending_count) &&
725 (ps->current_committed != ps->exceptions_per_area))
726 return;
727
728
729
730
731 if ((ps->current_committed == ps->exceptions_per_area) &&
732 zero_disk_area(ps, ps->current_area + 1))
733 ps->valid = 0;
734
735
736
737
738 if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
739 ps->valid = 0;
740
741
742
743
744 if (ps->current_committed == ps->exceptions_per_area) {
745 ps->current_committed = 0;
746 ps->current_area++;
747 zero_memory_area(ps);
748 }
749
750 for (i = 0; i < ps->callback_count; i++) {
751 cb = ps->callbacks + i;
752 cb->callback(cb->context, ps->valid);
753 }
754
755 ps->callback_count = 0;
756}
757
758static int persistent_prepare_merge(struct dm_exception_store *store,
759 chunk_t *last_old_chunk,
760 chunk_t *last_new_chunk)
761{
762 struct pstore *ps = get_info(store);
763 struct core_exception ce;
764 int nr_consecutive;
765 int r;
766
767
768
769
770 if (!ps->current_committed) {
771
772
773
774 if (!ps->current_area)
775 return 0;
776
777 ps->current_area--;
778 r = area_io(ps, READ);
779 if (r < 0)
780 return r;
781 ps->current_committed = ps->exceptions_per_area;
782 }
783
784 read_exception(ps, ps->area, ps->current_committed - 1, &ce);
785 *last_old_chunk = ce.old_chunk;
786 *last_new_chunk = ce.new_chunk;
787
788
789
790
791
792 for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
793 nr_consecutive++) {
794 read_exception(ps, ps->area,
795 ps->current_committed - 1 - nr_consecutive, &ce);
796 if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
797 ce.new_chunk != *last_new_chunk - nr_consecutive)
798 break;
799 }
800
801 return nr_consecutive;
802}
803
804static int persistent_commit_merge(struct dm_exception_store *store,
805 int nr_merged)
806{
807 int r, i;
808 struct pstore *ps = get_info(store);
809
810 BUG_ON(nr_merged > ps->current_committed);
811
812 for (i = 0; i < nr_merged; i++)
813 clear_exception(ps, ps->current_committed - 1 - i);
814
815 r = area_io(ps, WRITE_FLUSH_FUA);
816 if (r < 0)
817 return r;
818
819 ps->current_committed -= nr_merged;
820
821
822
823
824
825
826
827
828
829
830
831 ps->next_free = area_location(ps, ps->current_area) +
832 ps->current_committed + 1;
833
834 return 0;
835}
836
837static void persistent_drop_snapshot(struct dm_exception_store *store)
838{
839 struct pstore *ps = get_info(store);
840
841 ps->valid = 0;
842 if (write_header(ps))
843 DMWARN("write header failed");
844}
845
846static int persistent_ctr(struct dm_exception_store *store,
847 unsigned argc, char **argv)
848{
849 struct pstore *ps;
850
851
852 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
853 if (!ps)
854 return -ENOMEM;
855
856 ps->store = store;
857 ps->valid = 1;
858 ps->version = SNAPSHOT_DISK_VERSION;
859 ps->area = NULL;
860 ps->zero_area = NULL;
861 ps->header_area = NULL;
862 ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
863 ps->current_committed = 0;
864
865 ps->callback_count = 0;
866 atomic_set(&ps->pending_count, 0);
867 ps->callbacks = NULL;
868
869 ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
870 if (!ps->metadata_wq) {
871 kfree(ps);
872 DMERR("couldn't start header metadata update thread");
873 return -ENOMEM;
874 }
875
876 store->context = ps;
877
878 return 0;
879}
880
881static unsigned persistent_status(struct dm_exception_store *store,
882 status_type_t status, char *result,
883 unsigned maxlen)
884{
885 unsigned sz = 0;
886
887 switch (status) {
888 case STATUSTYPE_INFO:
889 break;
890 case STATUSTYPE_TABLE:
891 DMEMIT(" P %llu", (unsigned long long)store->chunk_size);
892 }
893
894 return sz;
895}
896
897static struct dm_exception_store_type _persistent_type = {
898 .name = "persistent",
899 .module = THIS_MODULE,
900 .ctr = persistent_ctr,
901 .dtr = persistent_dtr,
902 .read_metadata = persistent_read_metadata,
903 .prepare_exception = persistent_prepare_exception,
904 .commit_exception = persistent_commit_exception,
905 .prepare_merge = persistent_prepare_merge,
906 .commit_merge = persistent_commit_merge,
907 .drop_snapshot = persistent_drop_snapshot,
908 .usage = persistent_usage,
909 .status = persistent_status,
910};
911
912static struct dm_exception_store_type _persistent_compat_type = {
913 .name = "P",
914 .module = THIS_MODULE,
915 .ctr = persistent_ctr,
916 .dtr = persistent_dtr,
917 .read_metadata = persistent_read_metadata,
918 .prepare_exception = persistent_prepare_exception,
919 .commit_exception = persistent_commit_exception,
920 .prepare_merge = persistent_prepare_merge,
921 .commit_merge = persistent_commit_merge,
922 .drop_snapshot = persistent_drop_snapshot,
923 .usage = persistent_usage,
924 .status = persistent_status,
925};
926
927int dm_persistent_snapshot_init(void)
928{
929 int r;
930
931 r = dm_exception_store_type_register(&_persistent_type);
932 if (r) {
933 DMERR("Unable to register persistent exception store type");
934 return r;
935 }
936
937 r = dm_exception_store_type_register(&_persistent_compat_type);
938 if (r) {
939 DMERR("Unable to register old-style persistent exception "
940 "store type");
941 dm_exception_store_type_unregister(&_persistent_type);
942 return r;
943 }
944
945 return r;
946}
947
948void dm_persistent_snapshot_exit(void)
949{
950 dm_exception_store_type_unregister(&_persistent_type);
951 dm_exception_store_type_unregister(&_persistent_compat_type);
952}
953