1
2
3
4
5
6
7
8#include "dm-exception-store.h"
9
10#include <linux/ctype.h>
11#include <linux/mm.h>
12#include <linux/pagemap.h>
13#include <linux/vmalloc.h>
14#include <linux/export.h>
15#include <linux/slab.h>
16#include <linux/dm-io.h>
17#include <linux/dm-bufio.h>
18
19#define DM_MSG_PREFIX "persistent snapshot"
20#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U
21
22#define DM_PREFETCH_CHUNKS 12
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56#define SNAP_MAGIC 0x70416e53
57
58
59
60
61#define SNAPSHOT_DISK_VERSION 1
62
63#define NUM_SNAPSHOT_HDR_CHUNKS 1
64
65struct disk_header {
66 __le32 magic;
67
68
69
70
71
72 __le32 valid;
73
74
75
76
77
78 __le32 version;
79
80
81 __le32 chunk_size;
82} __packed;
83
84struct disk_exception {
85 __le64 old_chunk;
86 __le64 new_chunk;
87} __packed;
88
89struct core_exception {
90 uint64_t old_chunk;
91 uint64_t new_chunk;
92};
93
94struct commit_callback {
95 void (*callback)(void *, int success);
96 void *context;
97};
98
99
100
101
102struct pstore {
103 struct dm_exception_store *store;
104 int version;
105 int valid;
106 uint32_t exceptions_per_area;
107
108
109
110
111
112
113 void *area;
114
115
116
117
118 void *zero_area;
119
120
121
122
123
124
125 void *header_area;
126
127
128
129
130
131 chunk_t current_area;
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 chunk_t next_free;
152
153
154
155
156
157 uint32_t current_committed;
158
159 atomic_t pending_count;
160 uint32_t callback_count;
161 struct commit_callback *callbacks;
162 struct dm_io_client *io_client;
163
164 struct workqueue_struct *metadata_wq;
165};
166
167static int alloc_area(struct pstore *ps)
168{
169 int r = -ENOMEM;
170 size_t len;
171
172 len = ps->store->chunk_size << SECTOR_SHIFT;
173
174
175
176
177
178 ps->area = vmalloc(len);
179 if (!ps->area)
180 goto err_area;
181
182 ps->zero_area = vzalloc(len);
183 if (!ps->zero_area)
184 goto err_zero_area;
185
186 ps->header_area = vmalloc(len);
187 if (!ps->header_area)
188 goto err_header_area;
189
190 return 0;
191
192err_header_area:
193 vfree(ps->zero_area);
194
195err_zero_area:
196 vfree(ps->area);
197
198err_area:
199 return r;
200}
201
202static void free_area(struct pstore *ps)
203{
204 vfree(ps->area);
205 ps->area = NULL;
206 vfree(ps->zero_area);
207 ps->zero_area = NULL;
208 vfree(ps->header_area);
209 ps->header_area = NULL;
210}
211
212struct mdata_req {
213 struct dm_io_region *where;
214 struct dm_io_request *io_req;
215 struct work_struct work;
216 int result;
217};
218
219static void do_metadata(struct work_struct *work)
220{
221 struct mdata_req *req = container_of(work, struct mdata_req, work);
222
223 req->result = dm_io(req->io_req, 1, req->where, NULL);
224}
225
226
227
228
229static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
230 int op_flags, int metadata)
231{
232 struct dm_io_region where = {
233 .bdev = dm_snap_cow(ps->store->snap)->bdev,
234 .sector = ps->store->chunk_size * chunk,
235 .count = ps->store->chunk_size,
236 };
237 struct dm_io_request io_req = {
238 .bi_op = op,
239 .bi_op_flags = op_flags,
240 .mem.type = DM_IO_VMA,
241 .mem.ptr.vma = area,
242 .client = ps->io_client,
243 .notify.fn = NULL,
244 };
245 struct mdata_req req;
246
247 if (!metadata)
248 return dm_io(&io_req, 1, &where, NULL);
249
250 req.where = &where;
251 req.io_req = &io_req;
252
253
254
255
256
257 INIT_WORK_ONSTACK(&req.work, do_metadata);
258 queue_work(ps->metadata_wq, &req.work);
259 flush_workqueue(ps->metadata_wq);
260 destroy_work_on_stack(&req.work);
261
262 return req.result;
263}
264
265
266
267
268static chunk_t area_location(struct pstore *ps, chunk_t area)
269{
270 return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
271}
272
273static void skip_metadata(struct pstore *ps)
274{
275 uint32_t stride = ps->exceptions_per_area + 1;
276 chunk_t next_free = ps->next_free;
277 if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
278 ps->next_free++;
279}
280
281
282
283
284
285static int area_io(struct pstore *ps, int op, int op_flags)
286{
287 chunk_t chunk = area_location(ps, ps->current_area);
288
289 return chunk_io(ps, ps->area, chunk, op, op_flags, 0);
290}
291
292static void zero_memory_area(struct pstore *ps)
293{
294 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
295}
296
297static int zero_disk_area(struct pstore *ps, chunk_t area)
298{
299 return chunk_io(ps, ps->zero_area, area_location(ps, area),
300 REQ_OP_WRITE, 0, 0);
301}
302
303static int read_header(struct pstore *ps, int *new_snapshot)
304{
305 int r;
306 struct disk_header *dh;
307 unsigned chunk_size;
308 int chunk_size_supplied = 1;
309 char *chunk_err;
310
311
312
313
314
315 if (!ps->store->chunk_size) {
316 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
317 bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
318 bdev) >> 9);
319 ps->store->chunk_mask = ps->store->chunk_size - 1;
320 ps->store->chunk_shift = __ffs(ps->store->chunk_size);
321 chunk_size_supplied = 0;
322 }
323
324 ps->io_client = dm_io_client_create();
325 if (IS_ERR(ps->io_client))
326 return PTR_ERR(ps->io_client);
327
328 r = alloc_area(ps);
329 if (r)
330 return r;
331
332 r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1);
333 if (r)
334 goto bad;
335
336 dh = ps->header_area;
337
338 if (le32_to_cpu(dh->magic) == 0) {
339 *new_snapshot = 1;
340 return 0;
341 }
342
343 if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
344 DMWARN("Invalid or corrupt snapshot");
345 r = -ENXIO;
346 goto bad;
347 }
348
349 *new_snapshot = 0;
350 ps->valid = le32_to_cpu(dh->valid);
351 ps->version = le32_to_cpu(dh->version);
352 chunk_size = le32_to_cpu(dh->chunk_size);
353
354 if (ps->store->chunk_size == chunk_size)
355 return 0;
356
357 if (chunk_size_supplied)
358 DMWARN("chunk size %u in device metadata overrides "
359 "table chunk size of %u.",
360 chunk_size, ps->store->chunk_size);
361
362
363 free_area(ps);
364
365 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
366 &chunk_err);
367 if (r) {
368 DMERR("invalid on-disk chunk size %u: %s.",
369 chunk_size, chunk_err);
370 return r;
371 }
372
373 r = alloc_area(ps);
374 return r;
375
376bad:
377 free_area(ps);
378 return r;
379}
380
381static int write_header(struct pstore *ps)
382{
383 struct disk_header *dh;
384
385 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
386
387 dh = ps->header_area;
388 dh->magic = cpu_to_le32(SNAP_MAGIC);
389 dh->valid = cpu_to_le32(ps->valid);
390 dh->version = cpu_to_le32(ps->version);
391 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
392
393 return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1);
394}
395
396
397
398
399static struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
400 uint32_t index)
401{
402 BUG_ON(index >= ps->exceptions_per_area);
403
404 return ((struct disk_exception *) ps_area) + index;
405}
406
407static void read_exception(struct pstore *ps, void *ps_area,
408 uint32_t index, struct core_exception *result)
409{
410 struct disk_exception *de = get_exception(ps, ps_area, index);
411
412
413 result->old_chunk = le64_to_cpu(de->old_chunk);
414 result->new_chunk = le64_to_cpu(de->new_chunk);
415}
416
417static void write_exception(struct pstore *ps,
418 uint32_t index, struct core_exception *e)
419{
420 struct disk_exception *de = get_exception(ps, ps->area, index);
421
422
423 de->old_chunk = cpu_to_le64(e->old_chunk);
424 de->new_chunk = cpu_to_le64(e->new_chunk);
425}
426
427static void clear_exception(struct pstore *ps, uint32_t index)
428{
429 struct disk_exception *de = get_exception(ps, ps->area, index);
430
431
432 de->old_chunk = 0;
433 de->new_chunk = 0;
434}
435
436
437
438
439
440
441static int insert_exceptions(struct pstore *ps, void *ps_area,
442 int (*callback)(void *callback_context,
443 chunk_t old, chunk_t new),
444 void *callback_context,
445 int *full)
446{
447 int r;
448 unsigned int i;
449 struct core_exception e;
450
451
452 *full = 1;
453
454 for (i = 0; i < ps->exceptions_per_area; i++) {
455 read_exception(ps, ps_area, i, &e);
456
457
458
459
460
461
462
463 if (e.new_chunk == 0LL) {
464 ps->current_committed = i;
465 *full = 0;
466 break;
467 }
468
469
470
471
472 if (ps->next_free <= e.new_chunk)
473 ps->next_free = e.new_chunk + 1;
474
475
476
477
478 r = callback(callback_context, e.old_chunk, e.new_chunk);
479 if (r)
480 return r;
481 }
482
483 return 0;
484}
485
486static int read_exceptions(struct pstore *ps,
487 int (*callback)(void *callback_context, chunk_t old,
488 chunk_t new),
489 void *callback_context)
490{
491 int r, full = 1;
492 struct dm_bufio_client *client;
493 chunk_t prefetch_area = 0;
494
495 client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
496 ps->store->chunk_size << SECTOR_SHIFT,
497 1, 0, NULL, NULL);
498
499 if (IS_ERR(client))
500 return PTR_ERR(client);
501
502
503
504
505 dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
506
507
508
509
510
511 for (ps->current_area = 0; full; ps->current_area++) {
512 struct dm_buffer *bp;
513 void *area;
514 chunk_t chunk;
515
516 if (unlikely(prefetch_area < ps->current_area))
517 prefetch_area = ps->current_area;
518
519 if (DM_PREFETCH_CHUNKS) do {
520 chunk_t pf_chunk = area_location(ps, prefetch_area);
521 if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
522 break;
523 dm_bufio_prefetch(client, pf_chunk, 1);
524 prefetch_area++;
525 if (unlikely(!prefetch_area))
526 break;
527 } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
528
529 chunk = area_location(ps, ps->current_area);
530
531 area = dm_bufio_read(client, chunk, &bp);
532 if (IS_ERR(area)) {
533 r = PTR_ERR(area);
534 goto ret_destroy_bufio;
535 }
536
537 r = insert_exceptions(ps, area, callback, callback_context,
538 &full);
539
540 if (!full)
541 memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
542
543 dm_bufio_release(bp);
544
545 dm_bufio_forget(client, chunk);
546
547 if (unlikely(r))
548 goto ret_destroy_bufio;
549 }
550
551 ps->current_area--;
552
553 skip_metadata(ps);
554
555 r = 0;
556
557ret_destroy_bufio:
558 dm_bufio_client_destroy(client);
559
560 return r;
561}
562
563static struct pstore *get_info(struct dm_exception_store *store)
564{
565 return (struct pstore *) store->context;
566}
567
568static void persistent_usage(struct dm_exception_store *store,
569 sector_t *total_sectors,
570 sector_t *sectors_allocated,
571 sector_t *metadata_sectors)
572{
573 struct pstore *ps = get_info(store);
574
575 *sectors_allocated = ps->next_free * store->chunk_size;
576 *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
577
578
579
580
581
582
583 *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
584 store->chunk_size;
585}
586
587static void persistent_dtr(struct dm_exception_store *store)
588{
589 struct pstore *ps = get_info(store);
590
591 destroy_workqueue(ps->metadata_wq);
592
593
594 if (ps->io_client)
595 dm_io_client_destroy(ps->io_client);
596 free_area(ps);
597
598
599 vfree(ps->callbacks);
600
601 kfree(ps);
602}
603
604static int persistent_read_metadata(struct dm_exception_store *store,
605 int (*callback)(void *callback_context,
606 chunk_t old, chunk_t new),
607 void *callback_context)
608{
609 int r, new_snapshot;
610 struct pstore *ps = get_info(store);
611
612
613
614
615 r = read_header(ps, &new_snapshot);
616 if (r)
617 return r;
618
619
620
621
622 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
623 sizeof(struct disk_exception);
624 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
625 sizeof(*ps->callbacks));
626 if (!ps->callbacks)
627 return -ENOMEM;
628
629
630
631
632 if (new_snapshot) {
633 r = write_header(ps);
634 if (r) {
635 DMWARN("write_header failed");
636 return r;
637 }
638
639 ps->current_area = 0;
640 zero_memory_area(ps);
641 r = zero_disk_area(ps, 0);
642 if (r)
643 DMWARN("zero_disk_area(0) failed");
644 return r;
645 }
646
647
648
649 if (ps->version != SNAPSHOT_DISK_VERSION) {
650 DMWARN("unable to handle snapshot disk version %d",
651 ps->version);
652 return -EINVAL;
653 }
654
655
656
657
658 if (!ps->valid)
659 return 1;
660
661
662
663
664 r = read_exceptions(ps, callback, callback_context);
665
666 return r;
667}
668
669static int persistent_prepare_exception(struct dm_exception_store *store,
670 struct dm_exception *e)
671{
672 struct pstore *ps = get_info(store);
673 sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
674
675
676 if (size < ((ps->next_free + 1) * store->chunk_size))
677 return -ENOSPC;
678
679 e->new_chunk = ps->next_free;
680
681
682
683
684
685 ps->next_free++;
686 skip_metadata(ps);
687
688 atomic_inc(&ps->pending_count);
689 return 0;
690}
691
692static void persistent_commit_exception(struct dm_exception_store *store,
693 struct dm_exception *e, int valid,
694 void (*callback) (void *, int success),
695 void *callback_context)
696{
697 unsigned int i;
698 struct pstore *ps = get_info(store);
699 struct core_exception ce;
700 struct commit_callback *cb;
701
702 if (!valid)
703 ps->valid = 0;
704
705 ce.old_chunk = e->old_chunk;
706 ce.new_chunk = e->new_chunk;
707 write_exception(ps, ps->current_committed++, &ce);
708
709
710
711
712
713
714
715 cb = ps->callbacks + ps->callback_count++;
716 cb->callback = callback;
717 cb->context = callback_context;
718
719
720
721
722
723 if (!atomic_dec_and_test(&ps->pending_count) &&
724 (ps->current_committed != ps->exceptions_per_area))
725 return;
726
727
728
729
730 if ((ps->current_committed == ps->exceptions_per_area) &&
731 zero_disk_area(ps, ps->current_area + 1))
732 ps->valid = 0;
733
734
735
736
737 if (ps->valid && area_io(ps, REQ_OP_WRITE,
738 REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
739 ps->valid = 0;
740
741
742
743
744 if (ps->current_committed == ps->exceptions_per_area) {
745 ps->current_committed = 0;
746 ps->current_area++;
747 zero_memory_area(ps);
748 }
749
750 for (i = 0; i < ps->callback_count; i++) {
751 cb = ps->callbacks + i;
752 cb->callback(cb->context, ps->valid);
753 }
754
755 ps->callback_count = 0;
756}
757
758static int persistent_prepare_merge(struct dm_exception_store *store,
759 chunk_t *last_old_chunk,
760 chunk_t *last_new_chunk)
761{
762 struct pstore *ps = get_info(store);
763 struct core_exception ce;
764 int nr_consecutive;
765 int r;
766
767
768
769
770 if (!ps->current_committed) {
771
772
773
774 if (!ps->current_area)
775 return 0;
776
777 ps->current_area--;
778 r = area_io(ps, REQ_OP_READ, 0);
779 if (r < 0)
780 return r;
781 ps->current_committed = ps->exceptions_per_area;
782 }
783
784 read_exception(ps, ps->area, ps->current_committed - 1, &ce);
785 *last_old_chunk = ce.old_chunk;
786 *last_new_chunk = ce.new_chunk;
787
788
789
790
791
792 for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
793 nr_consecutive++) {
794 read_exception(ps, ps->area,
795 ps->current_committed - 1 - nr_consecutive, &ce);
796 if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
797 ce.new_chunk != *last_new_chunk - nr_consecutive)
798 break;
799 }
800
801 return nr_consecutive;
802}
803
804static int persistent_commit_merge(struct dm_exception_store *store,
805 int nr_merged)
806{
807 int r, i;
808 struct pstore *ps = get_info(store);
809
810 BUG_ON(nr_merged > ps->current_committed);
811
812 for (i = 0; i < nr_merged; i++)
813 clear_exception(ps, ps->current_committed - 1 - i);
814
815 r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA);
816 if (r < 0)
817 return r;
818
819 ps->current_committed -= nr_merged;
820
821
822
823
824
825
826
827
828
829
830
831 ps->next_free = area_location(ps, ps->current_area) +
832 ps->current_committed + 1;
833
834 return 0;
835}
836
837static void persistent_drop_snapshot(struct dm_exception_store *store)
838{
839 struct pstore *ps = get_info(store);
840
841 ps->valid = 0;
842 if (write_header(ps))
843 DMWARN("write header failed");
844}
845
846static int persistent_ctr(struct dm_exception_store *store, char *options)
847{
848 struct pstore *ps;
849 int r;
850
851
852 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
853 if (!ps)
854 return -ENOMEM;
855
856 ps->store = store;
857 ps->valid = 1;
858 ps->version = SNAPSHOT_DISK_VERSION;
859 ps->area = NULL;
860 ps->zero_area = NULL;
861 ps->header_area = NULL;
862 ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
863 ps->current_committed = 0;
864
865 ps->callback_count = 0;
866 atomic_set(&ps->pending_count, 0);
867 ps->callbacks = NULL;
868
869 ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
870 if (!ps->metadata_wq) {
871 DMERR("couldn't start header metadata update thread");
872 r = -ENOMEM;
873 goto err_workqueue;
874 }
875
876 if (options) {
877 char overflow = toupper(options[0]);
878 if (overflow == 'O')
879 store->userspace_supports_overflow = true;
880 else {
881 DMERR("Unsupported persistent store option: %s", options);
882 r = -EINVAL;
883 goto err_options;
884 }
885 }
886
887 store->context = ps;
888
889 return 0;
890
891err_options:
892 destroy_workqueue(ps->metadata_wq);
893err_workqueue:
894 kfree(ps);
895
896 return r;
897}
898
899static unsigned persistent_status(struct dm_exception_store *store,
900 status_type_t status, char *result,
901 unsigned maxlen)
902{
903 unsigned sz = 0;
904
905 switch (status) {
906 case STATUSTYPE_INFO:
907 break;
908 case STATUSTYPE_TABLE:
909 DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
910 (unsigned long long)store->chunk_size);
911 }
912
913 return sz;
914}
915
916static struct dm_exception_store_type _persistent_type = {
917 .name = "persistent",
918 .module = THIS_MODULE,
919 .ctr = persistent_ctr,
920 .dtr = persistent_dtr,
921 .read_metadata = persistent_read_metadata,
922 .prepare_exception = persistent_prepare_exception,
923 .commit_exception = persistent_commit_exception,
924 .prepare_merge = persistent_prepare_merge,
925 .commit_merge = persistent_commit_merge,
926 .drop_snapshot = persistent_drop_snapshot,
927 .usage = persistent_usage,
928 .status = persistent_status,
929};
930
931static struct dm_exception_store_type _persistent_compat_type = {
932 .name = "P",
933 .module = THIS_MODULE,
934 .ctr = persistent_ctr,
935 .dtr = persistent_dtr,
936 .read_metadata = persistent_read_metadata,
937 .prepare_exception = persistent_prepare_exception,
938 .commit_exception = persistent_commit_exception,
939 .prepare_merge = persistent_prepare_merge,
940 .commit_merge = persistent_commit_merge,
941 .drop_snapshot = persistent_drop_snapshot,
942 .usage = persistent_usage,
943 .status = persistent_status,
944};
945
946int dm_persistent_snapshot_init(void)
947{
948 int r;
949
950 r = dm_exception_store_type_register(&_persistent_type);
951 if (r) {
952 DMERR("Unable to register persistent exception store type");
953 return r;
954 }
955
956 r = dm_exception_store_type_register(&_persistent_compat_type);
957 if (r) {
958 DMERR("Unable to register old-style persistent exception "
959 "store type");
960 dm_exception_store_type_unregister(&_persistent_type);
961 return r;
962 }
963
964 return r;
965}
966
967void dm_persistent_snapshot_exit(void)
968{
969 dm_exception_store_type_unregister(&_persistent_type);
970 dm_exception_store_type_unregister(&_persistent_compat_type);
971}
972