1
2
3
4
5
6
7
8#include "dm-exception-store.h"
9
10#include <linux/ctype.h>
11#include <linux/mm.h>
12#include <linux/pagemap.h>
13#include <linux/vmalloc.h>
14#include <linux/export.h>
15#include <linux/slab.h>
16#include <linux/dm-io.h>
17#include <linux/dm-bufio.h>
18
19#define DM_MSG_PREFIX "persistent snapshot"
20#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U
21
22#define DM_PREFETCH_CHUNKS 12
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56#define SNAP_MAGIC 0x70416e53
57
58
59
60
61#define SNAPSHOT_DISK_VERSION 1
62
63#define NUM_SNAPSHOT_HDR_CHUNKS 1
64
65struct disk_header {
66 __le32 magic;
67
68
69
70
71
72 __le32 valid;
73
74
75
76
77
78 __le32 version;
79
80
81 __le32 chunk_size;
82} __packed;
83
84struct disk_exception {
85 __le64 old_chunk;
86 __le64 new_chunk;
87} __packed;
88
89struct core_exception {
90 uint64_t old_chunk;
91 uint64_t new_chunk;
92};
93
94struct commit_callback {
95 void (*callback)(void *, int success);
96 void *context;
97};
98
99
100
101
102struct pstore {
103 struct dm_exception_store *store;
104 int version;
105 int valid;
106 uint32_t exceptions_per_area;
107
108
109
110
111
112
113 void *area;
114
115
116
117
118 void *zero_area;
119
120
121
122
123
124
125 void *header_area;
126
127
128
129
130
131 chunk_t current_area;
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 chunk_t next_free;
152
153
154
155
156
157 uint32_t current_committed;
158
159 atomic_t pending_count;
160 uint32_t callback_count;
161 struct commit_callback *callbacks;
162 struct dm_io_client *io_client;
163
164 struct workqueue_struct *metadata_wq;
165};
166
167static int alloc_area(struct pstore *ps)
168{
169 int r = -ENOMEM;
170 size_t len;
171
172 len = ps->store->chunk_size << SECTOR_SHIFT;
173
174
175
176
177
178 ps->area = vmalloc(len);
179 if (!ps->area)
180 goto err_area;
181
182 ps->zero_area = vzalloc(len);
183 if (!ps->zero_area)
184 goto err_zero_area;
185
186 ps->header_area = vmalloc(len);
187 if (!ps->header_area)
188 goto err_header_area;
189
190 return 0;
191
192err_header_area:
193 vfree(ps->zero_area);
194
195err_zero_area:
196 vfree(ps->area);
197
198err_area:
199 return r;
200}
201
202static void free_area(struct pstore *ps)
203{
204 vfree(ps->area);
205 ps->area = NULL;
206 vfree(ps->zero_area);
207 ps->zero_area = NULL;
208 vfree(ps->header_area);
209 ps->header_area = NULL;
210}
211
212struct mdata_req {
213 struct dm_io_region *where;
214 struct dm_io_request *io_req;
215 struct work_struct work;
216 int result;
217};
218
219static void do_metadata(struct work_struct *work)
220{
221 struct mdata_req *req = container_of(work, struct mdata_req, work);
222
223 req->result = dm_io(req->io_req, 1, req->where, NULL);
224}
225
226
227
228
229static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
230 int op_flags, int metadata)
231{
232 struct dm_io_region where = {
233 .bdev = dm_snap_cow(ps->store->snap)->bdev,
234 .sector = ps->store->chunk_size * chunk,
235 .count = ps->store->chunk_size,
236 };
237 struct dm_io_request io_req = {
238 .bi_op = op,
239 .bi_op_flags = op_flags,
240 .mem.type = DM_IO_VMA,
241 .mem.ptr.vma = area,
242 .client = ps->io_client,
243 .notify.fn = NULL,
244 };
245 struct mdata_req req;
246
247 if (!metadata)
248 return dm_io(&io_req, 1, &where, NULL);
249
250 req.where = &where;
251 req.io_req = &io_req;
252
253
254
255
256
257 INIT_WORK_ONSTACK(&req.work, do_metadata);
258 queue_work(ps->metadata_wq, &req.work);
259 flush_workqueue(ps->metadata_wq);
260 destroy_work_on_stack(&req.work);
261
262 return req.result;
263}
264
265
266
267
268static chunk_t area_location(struct pstore *ps, chunk_t area)
269{
270 return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
271}
272
273static void skip_metadata(struct pstore *ps)
274{
275 uint32_t stride = ps->exceptions_per_area + 1;
276 chunk_t next_free = ps->next_free;
277 if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
278 ps->next_free++;
279}
280
281
282
283
284
285static int area_io(struct pstore *ps, int op, int op_flags)
286{
287 int r;
288 chunk_t chunk;
289
290 chunk = area_location(ps, ps->current_area);
291
292 r = chunk_io(ps, ps->area, chunk, op, op_flags, 0);
293 if (r)
294 return r;
295
296 return 0;
297}
298
299static void zero_memory_area(struct pstore *ps)
300{
301 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
302}
303
304static int zero_disk_area(struct pstore *ps, chunk_t area)
305{
306 return chunk_io(ps, ps->zero_area, area_location(ps, area),
307 REQ_OP_WRITE, 0, 0);
308}
309
310static int read_header(struct pstore *ps, int *new_snapshot)
311{
312 int r;
313 struct disk_header *dh;
314 unsigned chunk_size;
315 int chunk_size_supplied = 1;
316 char *chunk_err;
317
318
319
320
321
322 if (!ps->store->chunk_size) {
323 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
324 bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
325 bdev) >> 9);
326 ps->store->chunk_mask = ps->store->chunk_size - 1;
327 ps->store->chunk_shift = __ffs(ps->store->chunk_size);
328 chunk_size_supplied = 0;
329 }
330
331 ps->io_client = dm_io_client_create();
332 if (IS_ERR(ps->io_client))
333 return PTR_ERR(ps->io_client);
334
335 r = alloc_area(ps);
336 if (r)
337 return r;
338
339 r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1);
340 if (r)
341 goto bad;
342
343 dh = ps->header_area;
344
345 if (le32_to_cpu(dh->magic) == 0) {
346 *new_snapshot = 1;
347 return 0;
348 }
349
350 if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
351 DMWARN("Invalid or corrupt snapshot");
352 r = -ENXIO;
353 goto bad;
354 }
355
356 *new_snapshot = 0;
357 ps->valid = le32_to_cpu(dh->valid);
358 ps->version = le32_to_cpu(dh->version);
359 chunk_size = le32_to_cpu(dh->chunk_size);
360
361 if (ps->store->chunk_size == chunk_size)
362 return 0;
363
364 if (chunk_size_supplied)
365 DMWARN("chunk size %u in device metadata overrides "
366 "table chunk size of %u.",
367 chunk_size, ps->store->chunk_size);
368
369
370 free_area(ps);
371
372 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
373 &chunk_err);
374 if (r) {
375 DMERR("invalid on-disk chunk size %u: %s.",
376 chunk_size, chunk_err);
377 return r;
378 }
379
380 r = alloc_area(ps);
381 return r;
382
383bad:
384 free_area(ps);
385 return r;
386}
387
388static int write_header(struct pstore *ps)
389{
390 struct disk_header *dh;
391
392 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
393
394 dh = ps->header_area;
395 dh->magic = cpu_to_le32(SNAP_MAGIC);
396 dh->valid = cpu_to_le32(ps->valid);
397 dh->version = cpu_to_le32(ps->version);
398 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
399
400 return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1);
401}
402
403
404
405
406static struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
407 uint32_t index)
408{
409 BUG_ON(index >= ps->exceptions_per_area);
410
411 return ((struct disk_exception *) ps_area) + index;
412}
413
414static void read_exception(struct pstore *ps, void *ps_area,
415 uint32_t index, struct core_exception *result)
416{
417 struct disk_exception *de = get_exception(ps, ps_area, index);
418
419
420 result->old_chunk = le64_to_cpu(de->old_chunk);
421 result->new_chunk = le64_to_cpu(de->new_chunk);
422}
423
424static void write_exception(struct pstore *ps,
425 uint32_t index, struct core_exception *e)
426{
427 struct disk_exception *de = get_exception(ps, ps->area, index);
428
429
430 de->old_chunk = cpu_to_le64(e->old_chunk);
431 de->new_chunk = cpu_to_le64(e->new_chunk);
432}
433
434static void clear_exception(struct pstore *ps, uint32_t index)
435{
436 struct disk_exception *de = get_exception(ps, ps->area, index);
437
438
439 de->old_chunk = 0;
440 de->new_chunk = 0;
441}
442
443
444
445
446
447
448static int insert_exceptions(struct pstore *ps, void *ps_area,
449 int (*callback)(void *callback_context,
450 chunk_t old, chunk_t new),
451 void *callback_context,
452 int *full)
453{
454 int r;
455 unsigned int i;
456 struct core_exception e;
457
458
459 *full = 1;
460
461 for (i = 0; i < ps->exceptions_per_area; i++) {
462 read_exception(ps, ps_area, i, &e);
463
464
465
466
467
468
469
470 if (e.new_chunk == 0LL) {
471 ps->current_committed = i;
472 *full = 0;
473 break;
474 }
475
476
477
478
479 if (ps->next_free <= e.new_chunk)
480 ps->next_free = e.new_chunk + 1;
481
482
483
484
485 r = callback(callback_context, e.old_chunk, e.new_chunk);
486 if (r)
487 return r;
488 }
489
490 return 0;
491}
492
493static int read_exceptions(struct pstore *ps,
494 int (*callback)(void *callback_context, chunk_t old,
495 chunk_t new),
496 void *callback_context)
497{
498 int r, full = 1;
499 struct dm_bufio_client *client;
500 chunk_t prefetch_area = 0;
501
502 client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
503 ps->store->chunk_size << SECTOR_SHIFT,
504 1, 0, NULL, NULL);
505
506 if (IS_ERR(client))
507 return PTR_ERR(client);
508
509
510
511
512 dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
513
514
515
516
517
518 for (ps->current_area = 0; full; ps->current_area++) {
519 struct dm_buffer *bp;
520 void *area;
521 chunk_t chunk;
522
523 if (unlikely(prefetch_area < ps->current_area))
524 prefetch_area = ps->current_area;
525
526 if (DM_PREFETCH_CHUNKS) do {
527 chunk_t pf_chunk = area_location(ps, prefetch_area);
528 if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
529 break;
530 dm_bufio_prefetch(client, pf_chunk, 1);
531 prefetch_area++;
532 if (unlikely(!prefetch_area))
533 break;
534 } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
535
536 chunk = area_location(ps, ps->current_area);
537
538 area = dm_bufio_read(client, chunk, &bp);
539 if (IS_ERR(area)) {
540 r = PTR_ERR(area);
541 goto ret_destroy_bufio;
542 }
543
544 r = insert_exceptions(ps, area, callback, callback_context,
545 &full);
546
547 if (!full)
548 memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
549
550 dm_bufio_release(bp);
551
552 dm_bufio_forget(client, chunk);
553
554 if (unlikely(r))
555 goto ret_destroy_bufio;
556 }
557
558 ps->current_area--;
559
560 skip_metadata(ps);
561
562 r = 0;
563
564ret_destroy_bufio:
565 dm_bufio_client_destroy(client);
566
567 return r;
568}
569
570static struct pstore *get_info(struct dm_exception_store *store)
571{
572 return (struct pstore *) store->context;
573}
574
575static void persistent_usage(struct dm_exception_store *store,
576 sector_t *total_sectors,
577 sector_t *sectors_allocated,
578 sector_t *metadata_sectors)
579{
580 struct pstore *ps = get_info(store);
581
582 *sectors_allocated = ps->next_free * store->chunk_size;
583 *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
584
585
586
587
588
589
590 *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
591 store->chunk_size;
592}
593
594static void persistent_dtr(struct dm_exception_store *store)
595{
596 struct pstore *ps = get_info(store);
597
598 destroy_workqueue(ps->metadata_wq);
599
600
601 if (ps->io_client)
602 dm_io_client_destroy(ps->io_client);
603 free_area(ps);
604
605
606 vfree(ps->callbacks);
607
608 kfree(ps);
609}
610
611static int persistent_read_metadata(struct dm_exception_store *store,
612 int (*callback)(void *callback_context,
613 chunk_t old, chunk_t new),
614 void *callback_context)
615{
616 int r, uninitialized_var(new_snapshot);
617 struct pstore *ps = get_info(store);
618
619
620
621
622 r = read_header(ps, &new_snapshot);
623 if (r)
624 return r;
625
626
627
628
629 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
630 sizeof(struct disk_exception);
631 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
632 sizeof(*ps->callbacks));
633 if (!ps->callbacks)
634 return -ENOMEM;
635
636
637
638
639 if (new_snapshot) {
640 r = write_header(ps);
641 if (r) {
642 DMWARN("write_header failed");
643 return r;
644 }
645
646 ps->current_area = 0;
647 zero_memory_area(ps);
648 r = zero_disk_area(ps, 0);
649 if (r)
650 DMWARN("zero_disk_area(0) failed");
651 return r;
652 }
653
654
655
656 if (ps->version != SNAPSHOT_DISK_VERSION) {
657 DMWARN("unable to handle snapshot disk version %d",
658 ps->version);
659 return -EINVAL;
660 }
661
662
663
664
665 if (!ps->valid)
666 return 1;
667
668
669
670
671 r = read_exceptions(ps, callback, callback_context);
672
673 return r;
674}
675
676static int persistent_prepare_exception(struct dm_exception_store *store,
677 struct dm_exception *e)
678{
679 struct pstore *ps = get_info(store);
680 sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
681
682
683 if (size < ((ps->next_free + 1) * store->chunk_size))
684 return -ENOSPC;
685
686 e->new_chunk = ps->next_free;
687
688
689
690
691
692 ps->next_free++;
693 skip_metadata(ps);
694
695 atomic_inc(&ps->pending_count);
696 return 0;
697}
698
699static void persistent_commit_exception(struct dm_exception_store *store,
700 struct dm_exception *e, int valid,
701 void (*callback) (void *, int success),
702 void *callback_context)
703{
704 unsigned int i;
705 struct pstore *ps = get_info(store);
706 struct core_exception ce;
707 struct commit_callback *cb;
708
709 if (!valid)
710 ps->valid = 0;
711
712 ce.old_chunk = e->old_chunk;
713 ce.new_chunk = e->new_chunk;
714 write_exception(ps, ps->current_committed++, &ce);
715
716
717
718
719
720
721
722 cb = ps->callbacks + ps->callback_count++;
723 cb->callback = callback;
724 cb->context = callback_context;
725
726
727
728
729
730 if (!atomic_dec_and_test(&ps->pending_count) &&
731 (ps->current_committed != ps->exceptions_per_area))
732 return;
733
734
735
736
737 if ((ps->current_committed == ps->exceptions_per_area) &&
738 zero_disk_area(ps, ps->current_area + 1))
739 ps->valid = 0;
740
741
742
743
744 if (ps->valid && area_io(ps, REQ_OP_WRITE,
745 REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
746 ps->valid = 0;
747
748
749
750
751 if (ps->current_committed == ps->exceptions_per_area) {
752 ps->current_committed = 0;
753 ps->current_area++;
754 zero_memory_area(ps);
755 }
756
757 for (i = 0; i < ps->callback_count; i++) {
758 cb = ps->callbacks + i;
759 cb->callback(cb->context, ps->valid);
760 }
761
762 ps->callback_count = 0;
763}
764
765static int persistent_prepare_merge(struct dm_exception_store *store,
766 chunk_t *last_old_chunk,
767 chunk_t *last_new_chunk)
768{
769 struct pstore *ps = get_info(store);
770 struct core_exception ce;
771 int nr_consecutive;
772 int r;
773
774
775
776
777 if (!ps->current_committed) {
778
779
780
781 if (!ps->current_area)
782 return 0;
783
784 ps->current_area--;
785 r = area_io(ps, REQ_OP_READ, 0);
786 if (r < 0)
787 return r;
788 ps->current_committed = ps->exceptions_per_area;
789 }
790
791 read_exception(ps, ps->area, ps->current_committed - 1, &ce);
792 *last_old_chunk = ce.old_chunk;
793 *last_new_chunk = ce.new_chunk;
794
795
796
797
798
799 for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
800 nr_consecutive++) {
801 read_exception(ps, ps->area,
802 ps->current_committed - 1 - nr_consecutive, &ce);
803 if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
804 ce.new_chunk != *last_new_chunk - nr_consecutive)
805 break;
806 }
807
808 return nr_consecutive;
809}
810
811static int persistent_commit_merge(struct dm_exception_store *store,
812 int nr_merged)
813{
814 int r, i;
815 struct pstore *ps = get_info(store);
816
817 BUG_ON(nr_merged > ps->current_committed);
818
819 for (i = 0; i < nr_merged; i++)
820 clear_exception(ps, ps->current_committed - 1 - i);
821
822 r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA);
823 if (r < 0)
824 return r;
825
826 ps->current_committed -= nr_merged;
827
828
829
830
831
832
833
834
835
836
837
838 ps->next_free = area_location(ps, ps->current_area) +
839 ps->current_committed + 1;
840
841 return 0;
842}
843
844static void persistent_drop_snapshot(struct dm_exception_store *store)
845{
846 struct pstore *ps = get_info(store);
847
848 ps->valid = 0;
849 if (write_header(ps))
850 DMWARN("write header failed");
851}
852
853static int persistent_ctr(struct dm_exception_store *store, char *options)
854{
855 struct pstore *ps;
856 int r;
857
858
859 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
860 if (!ps)
861 return -ENOMEM;
862
863 ps->store = store;
864 ps->valid = 1;
865 ps->version = SNAPSHOT_DISK_VERSION;
866 ps->area = NULL;
867 ps->zero_area = NULL;
868 ps->header_area = NULL;
869 ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
870 ps->current_committed = 0;
871
872 ps->callback_count = 0;
873 atomic_set(&ps->pending_count, 0);
874 ps->callbacks = NULL;
875
876 ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
877 if (!ps->metadata_wq) {
878 DMERR("couldn't start header metadata update thread");
879 r = -ENOMEM;
880 goto err_workqueue;
881 }
882
883 if (options) {
884 char overflow = toupper(options[0]);
885 if (overflow == 'O')
886 store->userspace_supports_overflow = true;
887 else {
888 DMERR("Unsupported persistent store option: %s", options);
889 r = -EINVAL;
890 goto err_options;
891 }
892 }
893
894 store->context = ps;
895
896 return 0;
897
898err_options:
899 destroy_workqueue(ps->metadata_wq);
900err_workqueue:
901 kfree(ps);
902
903 return r;
904}
905
906static unsigned persistent_status(struct dm_exception_store *store,
907 status_type_t status, char *result,
908 unsigned maxlen)
909{
910 unsigned sz = 0;
911
912 switch (status) {
913 case STATUSTYPE_INFO:
914 break;
915 case STATUSTYPE_TABLE:
916 DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
917 (unsigned long long)store->chunk_size);
918 }
919
920 return sz;
921}
922
923static struct dm_exception_store_type _persistent_type = {
924 .name = "persistent",
925 .module = THIS_MODULE,
926 .ctr = persistent_ctr,
927 .dtr = persistent_dtr,
928 .read_metadata = persistent_read_metadata,
929 .prepare_exception = persistent_prepare_exception,
930 .commit_exception = persistent_commit_exception,
931 .prepare_merge = persistent_prepare_merge,
932 .commit_merge = persistent_commit_merge,
933 .drop_snapshot = persistent_drop_snapshot,
934 .usage = persistent_usage,
935 .status = persistent_status,
936};
937
938static struct dm_exception_store_type _persistent_compat_type = {
939 .name = "P",
940 .module = THIS_MODULE,
941 .ctr = persistent_ctr,
942 .dtr = persistent_dtr,
943 .read_metadata = persistent_read_metadata,
944 .prepare_exception = persistent_prepare_exception,
945 .commit_exception = persistent_commit_exception,
946 .prepare_merge = persistent_prepare_merge,
947 .commit_merge = persistent_commit_merge,
948 .drop_snapshot = persistent_drop_snapshot,
949 .usage = persistent_usage,
950 .status = persistent_status,
951};
952
953int dm_persistent_snapshot_init(void)
954{
955 int r;
956
957 r = dm_exception_store_type_register(&_persistent_type);
958 if (r) {
959 DMERR("Unable to register persistent exception store type");
960 return r;
961 }
962
963 r = dm_exception_store_type_register(&_persistent_compat_type);
964 if (r) {
965 DMERR("Unable to register old-style persistent exception "
966 "store type");
967 dm_exception_store_type_unregister(&_persistent_type);
968 return r;
969 }
970
971 return r;
972}
973
974void dm_persistent_snapshot_exit(void)
975{
976 dm_exception_store_type_unregister(&_persistent_type);
977 dm_exception_store_type_unregister(&_persistent_compat_type);
978}
979