1
2
3
4
5
6
7
8#include "dm-exception-store.h"
9
10#include <linux/ctype.h>
11#include <linux/mm.h>
12#include <linux/pagemap.h>
13#include <linux/vmalloc.h>
14#include <linux/export.h>
15#include <linux/slab.h>
16#include <linux/dm-io.h>
17#include "dm-bufio.h"
18
19#define DM_MSG_PREFIX "persistent snapshot"
20#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32
21
22#define DM_PREFETCH_CHUNKS 12
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56#define SNAP_MAGIC 0x70416e53
57
58
59
60
61#define SNAPSHOT_DISK_VERSION 1
62
63#define NUM_SNAPSHOT_HDR_CHUNKS 1
64
65struct disk_header {
66 __le32 magic;
67
68
69
70
71
72 __le32 valid;
73
74
75
76
77
78 __le32 version;
79
80
81 __le32 chunk_size;
82} __packed;
83
84struct disk_exception {
85 __le64 old_chunk;
86 __le64 new_chunk;
87} __packed;
88
89struct core_exception {
90 uint64_t old_chunk;
91 uint64_t new_chunk;
92};
93
94struct commit_callback {
95 void (*callback)(void *, int success);
96 void *context;
97};
98
99
100
101
102struct pstore {
103 struct dm_exception_store *store;
104 int version;
105 int valid;
106 uint32_t exceptions_per_area;
107
108
109
110
111
112
113 void *area;
114
115
116
117
118 void *zero_area;
119
120
121
122
123
124
125 void *header_area;
126
127
128
129
130
131 chunk_t current_area;
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 chunk_t next_free;
152
153
154
155
156
157 uint32_t current_committed;
158
159 atomic_t pending_count;
160 uint32_t callback_count;
161 struct commit_callback *callbacks;
162 struct dm_io_client *io_client;
163
164 struct workqueue_struct *metadata_wq;
165};
166
167static int alloc_area(struct pstore *ps)
168{
169 int r = -ENOMEM;
170 size_t len;
171
172 len = ps->store->chunk_size << SECTOR_SHIFT;
173
174
175
176
177
178 ps->area = vmalloc(len);
179 if (!ps->area)
180 goto err_area;
181
182 ps->zero_area = vzalloc(len);
183 if (!ps->zero_area)
184 goto err_zero_area;
185
186 ps->header_area = vmalloc(len);
187 if (!ps->header_area)
188 goto err_header_area;
189
190 return 0;
191
192err_header_area:
193 vfree(ps->zero_area);
194
195err_zero_area:
196 vfree(ps->area);
197
198err_area:
199 return r;
200}
201
202static void free_area(struct pstore *ps)
203{
204 vfree(ps->area);
205 ps->area = NULL;
206 vfree(ps->zero_area);
207 ps->zero_area = NULL;
208 vfree(ps->header_area);
209 ps->header_area = NULL;
210}
211
212struct mdata_req {
213 struct dm_io_region *where;
214 struct dm_io_request *io_req;
215 struct work_struct work;
216 int result;
217};
218
219static void do_metadata(struct work_struct *work)
220{
221 struct mdata_req *req = container_of(work, struct mdata_req, work);
222
223 req->result = dm_io(req->io_req, 1, req->where, NULL);
224}
225
226
227
228
229static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
230 int metadata)
231{
232 struct dm_io_region where = {
233 .bdev = dm_snap_cow(ps->store->snap)->bdev,
234 .sector = ps->store->chunk_size * chunk,
235 .count = ps->store->chunk_size,
236 };
237 struct dm_io_request io_req = {
238 .bi_rw = rw,
239 .mem.type = DM_IO_VMA,
240 .mem.ptr.vma = area,
241 .client = ps->io_client,
242 .notify.fn = NULL,
243 };
244 struct mdata_req req;
245
246 if (!metadata)
247 return dm_io(&io_req, 1, &where, NULL);
248
249 req.where = &where;
250 req.io_req = &io_req;
251
252
253
254
255
256 INIT_WORK_ONSTACK(&req.work, do_metadata);
257 queue_work(ps->metadata_wq, &req.work);
258 flush_workqueue(ps->metadata_wq);
259 destroy_work_on_stack(&req.work);
260
261 return req.result;
262}
263
264
265
266
267static chunk_t area_location(struct pstore *ps, chunk_t area)
268{
269 return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
270}
271
272static void skip_metadata(struct pstore *ps)
273{
274 uint32_t stride = ps->exceptions_per_area + 1;
275 chunk_t next_free = ps->next_free;
276 if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
277 ps->next_free++;
278}
279
280
281
282
283
284static int area_io(struct pstore *ps, int rw)
285{
286 int r;
287 chunk_t chunk;
288
289 chunk = area_location(ps, ps->current_area);
290
291 r = chunk_io(ps, ps->area, chunk, rw, 0);
292 if (r)
293 return r;
294
295 return 0;
296}
297
298static void zero_memory_area(struct pstore *ps)
299{
300 memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
301}
302
303static int zero_disk_area(struct pstore *ps, chunk_t area)
304{
305 return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
306}
307
308static int read_header(struct pstore *ps, int *new_snapshot)
309{
310 int r;
311 struct disk_header *dh;
312 unsigned chunk_size;
313 int chunk_size_supplied = 1;
314 char *chunk_err;
315
316
317
318
319
320 if (!ps->store->chunk_size) {
321 ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
322 bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
323 bdev) >> 9);
324 ps->store->chunk_mask = ps->store->chunk_size - 1;
325 ps->store->chunk_shift = __ffs(ps->store->chunk_size);
326 chunk_size_supplied = 0;
327 }
328
329 ps->io_client = dm_io_client_create();
330 if (IS_ERR(ps->io_client))
331 return PTR_ERR(ps->io_client);
332
333 r = alloc_area(ps);
334 if (r)
335 return r;
336
337 r = chunk_io(ps, ps->header_area, 0, READ, 1);
338 if (r)
339 goto bad;
340
341 dh = ps->header_area;
342
343 if (le32_to_cpu(dh->magic) == 0) {
344 *new_snapshot = 1;
345 return 0;
346 }
347
348 if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
349 DMWARN("Invalid or corrupt snapshot");
350 r = -ENXIO;
351 goto bad;
352 }
353
354 *new_snapshot = 0;
355 ps->valid = le32_to_cpu(dh->valid);
356 ps->version = le32_to_cpu(dh->version);
357 chunk_size = le32_to_cpu(dh->chunk_size);
358
359 if (ps->store->chunk_size == chunk_size)
360 return 0;
361
362 if (chunk_size_supplied)
363 DMWARN("chunk size %u in device metadata overrides "
364 "table chunk size of %u.",
365 chunk_size, ps->store->chunk_size);
366
367
368 free_area(ps);
369
370 r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
371 &chunk_err);
372 if (r) {
373 DMERR("invalid on-disk chunk size %u: %s.",
374 chunk_size, chunk_err);
375 return r;
376 }
377
378 r = alloc_area(ps);
379 return r;
380
381bad:
382 free_area(ps);
383 return r;
384}
385
386static int write_header(struct pstore *ps)
387{
388 struct disk_header *dh;
389
390 memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
391
392 dh = ps->header_area;
393 dh->magic = cpu_to_le32(SNAP_MAGIC);
394 dh->valid = cpu_to_le32(ps->valid);
395 dh->version = cpu_to_le32(ps->version);
396 dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
397
398 return chunk_io(ps, ps->header_area, 0, WRITE, 1);
399}
400
401
402
403
404static struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
405 uint32_t index)
406{
407 BUG_ON(index >= ps->exceptions_per_area);
408
409 return ((struct disk_exception *) ps_area) + index;
410}
411
412static void read_exception(struct pstore *ps, void *ps_area,
413 uint32_t index, struct core_exception *result)
414{
415 struct disk_exception *de = get_exception(ps, ps_area, index);
416
417
418 result->old_chunk = le64_to_cpu(de->old_chunk);
419 result->new_chunk = le64_to_cpu(de->new_chunk);
420}
421
422static void write_exception(struct pstore *ps,
423 uint32_t index, struct core_exception *e)
424{
425 struct disk_exception *de = get_exception(ps, ps->area, index);
426
427
428 de->old_chunk = cpu_to_le64(e->old_chunk);
429 de->new_chunk = cpu_to_le64(e->new_chunk);
430}
431
432static void clear_exception(struct pstore *ps, uint32_t index)
433{
434 struct disk_exception *de = get_exception(ps, ps->area, index);
435
436
437 de->old_chunk = 0;
438 de->new_chunk = 0;
439}
440
441
442
443
444
445
446static int insert_exceptions(struct pstore *ps, void *ps_area,
447 int (*callback)(void *callback_context,
448 chunk_t old, chunk_t new),
449 void *callback_context,
450 int *full)
451{
452 int r;
453 unsigned int i;
454 struct core_exception e;
455
456
457 *full = 1;
458
459 for (i = 0; i < ps->exceptions_per_area; i++) {
460 read_exception(ps, ps_area, i, &e);
461
462
463
464
465
466
467
468 if (e.new_chunk == 0LL) {
469 ps->current_committed = i;
470 *full = 0;
471 break;
472 }
473
474
475
476
477 if (ps->next_free <= e.new_chunk)
478 ps->next_free = e.new_chunk + 1;
479
480
481
482
483 r = callback(callback_context, e.old_chunk, e.new_chunk);
484 if (r)
485 return r;
486 }
487
488 return 0;
489}
490
491static int read_exceptions(struct pstore *ps,
492 int (*callback)(void *callback_context, chunk_t old,
493 chunk_t new),
494 void *callback_context)
495{
496 int r, full = 1;
497 struct dm_bufio_client *client;
498 chunk_t prefetch_area = 0;
499
500 client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
501 ps->store->chunk_size << SECTOR_SHIFT,
502 1, 0, NULL, NULL);
503
504 if (IS_ERR(client))
505 return PTR_ERR(client);
506
507
508
509
510 dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
511
512
513
514
515
516 for (ps->current_area = 0; full; ps->current_area++) {
517 struct dm_buffer *bp;
518 void *area;
519 chunk_t chunk;
520
521 if (unlikely(prefetch_area < ps->current_area))
522 prefetch_area = ps->current_area;
523
524 if (DM_PREFETCH_CHUNKS) do {
525 chunk_t pf_chunk = area_location(ps, prefetch_area);
526 if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
527 break;
528 dm_bufio_prefetch(client, pf_chunk, 1);
529 prefetch_area++;
530 if (unlikely(!prefetch_area))
531 break;
532 } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
533
534 chunk = area_location(ps, ps->current_area);
535
536 area = dm_bufio_read(client, chunk, &bp);
537 if (IS_ERR(area)) {
538 r = PTR_ERR(area);
539 goto ret_destroy_bufio;
540 }
541
542 r = insert_exceptions(ps, area, callback, callback_context,
543 &full);
544
545 if (!full)
546 memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
547
548 dm_bufio_release(bp);
549
550 dm_bufio_forget(client, chunk);
551
552 if (unlikely(r))
553 goto ret_destroy_bufio;
554 }
555
556 ps->current_area--;
557
558 skip_metadata(ps);
559
560 r = 0;
561
562ret_destroy_bufio:
563 dm_bufio_client_destroy(client);
564
565 return r;
566}
567
568static struct pstore *get_info(struct dm_exception_store *store)
569{
570 return (struct pstore *) store->context;
571}
572
573static void persistent_usage(struct dm_exception_store *store,
574 sector_t *total_sectors,
575 sector_t *sectors_allocated,
576 sector_t *metadata_sectors)
577{
578 struct pstore *ps = get_info(store);
579
580 *sectors_allocated = ps->next_free * store->chunk_size;
581 *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
582
583
584
585
586
587
588 *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
589 store->chunk_size;
590}
591
592static void persistent_dtr(struct dm_exception_store *store)
593{
594 struct pstore *ps = get_info(store);
595
596 destroy_workqueue(ps->metadata_wq);
597
598
599 if (ps->io_client)
600 dm_io_client_destroy(ps->io_client);
601 free_area(ps);
602
603
604 vfree(ps->callbacks);
605
606 kfree(ps);
607}
608
609static int persistent_read_metadata(struct dm_exception_store *store,
610 int (*callback)(void *callback_context,
611 chunk_t old, chunk_t new),
612 void *callback_context)
613{
614 int r, uninitialized_var(new_snapshot);
615 struct pstore *ps = get_info(store);
616
617
618
619
620 r = read_header(ps, &new_snapshot);
621 if (r)
622 return r;
623
624
625
626
627 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
628 sizeof(struct disk_exception);
629 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
630 sizeof(*ps->callbacks));
631 if (!ps->callbacks)
632 return -ENOMEM;
633
634
635
636
637 if (new_snapshot) {
638 r = write_header(ps);
639 if (r) {
640 DMWARN("write_header failed");
641 return r;
642 }
643
644 ps->current_area = 0;
645 zero_memory_area(ps);
646 r = zero_disk_area(ps, 0);
647 if (r)
648 DMWARN("zero_disk_area(0) failed");
649 return r;
650 }
651
652
653
654 if (ps->version != SNAPSHOT_DISK_VERSION) {
655 DMWARN("unable to handle snapshot disk version %d",
656 ps->version);
657 return -EINVAL;
658 }
659
660
661
662
663 if (!ps->valid)
664 return 1;
665
666
667
668
669 r = read_exceptions(ps, callback, callback_context);
670
671 return r;
672}
673
674static int persistent_prepare_exception(struct dm_exception_store *store,
675 struct dm_exception *e)
676{
677 struct pstore *ps = get_info(store);
678 sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
679
680
681 if (size < ((ps->next_free + 1) * store->chunk_size))
682 return -ENOSPC;
683
684 e->new_chunk = ps->next_free;
685
686
687
688
689
690 ps->next_free++;
691 skip_metadata(ps);
692
693 atomic_inc(&ps->pending_count);
694 return 0;
695}
696
697static void persistent_commit_exception(struct dm_exception_store *store,
698 struct dm_exception *e, int valid,
699 void (*callback) (void *, int success),
700 void *callback_context)
701{
702 unsigned int i;
703 struct pstore *ps = get_info(store);
704 struct core_exception ce;
705 struct commit_callback *cb;
706
707 if (!valid)
708 ps->valid = 0;
709
710 ce.old_chunk = e->old_chunk;
711 ce.new_chunk = e->new_chunk;
712 write_exception(ps, ps->current_committed++, &ce);
713
714
715
716
717
718
719
720 cb = ps->callbacks + ps->callback_count++;
721 cb->callback = callback;
722 cb->context = callback_context;
723
724
725
726
727
728 if (!atomic_dec_and_test(&ps->pending_count) &&
729 (ps->current_committed != ps->exceptions_per_area))
730 return;
731
732
733
734
735 if ((ps->current_committed == ps->exceptions_per_area) &&
736 zero_disk_area(ps, ps->current_area + 1))
737 ps->valid = 0;
738
739
740
741
742 if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
743 ps->valid = 0;
744
745
746
747
748 if (ps->current_committed == ps->exceptions_per_area) {
749 ps->current_committed = 0;
750 ps->current_area++;
751 zero_memory_area(ps);
752 }
753
754 for (i = 0; i < ps->callback_count; i++) {
755 cb = ps->callbacks + i;
756 cb->callback(cb->context, ps->valid);
757 }
758
759 ps->callback_count = 0;
760}
761
762static int persistent_prepare_merge(struct dm_exception_store *store,
763 chunk_t *last_old_chunk,
764 chunk_t *last_new_chunk)
765{
766 struct pstore *ps = get_info(store);
767 struct core_exception ce;
768 int nr_consecutive;
769 int r;
770
771
772
773
774 if (!ps->current_committed) {
775
776
777
778 if (!ps->current_area)
779 return 0;
780
781 ps->current_area--;
782 r = area_io(ps, READ);
783 if (r < 0)
784 return r;
785 ps->current_committed = ps->exceptions_per_area;
786 }
787
788 read_exception(ps, ps->area, ps->current_committed - 1, &ce);
789 *last_old_chunk = ce.old_chunk;
790 *last_new_chunk = ce.new_chunk;
791
792
793
794
795
796 for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
797 nr_consecutive++) {
798 read_exception(ps, ps->area,
799 ps->current_committed - 1 - nr_consecutive, &ce);
800 if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
801 ce.new_chunk != *last_new_chunk - nr_consecutive)
802 break;
803 }
804
805 return nr_consecutive;
806}
807
808static int persistent_commit_merge(struct dm_exception_store *store,
809 int nr_merged)
810{
811 int r, i;
812 struct pstore *ps = get_info(store);
813
814 BUG_ON(nr_merged > ps->current_committed);
815
816 for (i = 0; i < nr_merged; i++)
817 clear_exception(ps, ps->current_committed - 1 - i);
818
819 r = area_io(ps, WRITE_FLUSH_FUA);
820 if (r < 0)
821 return r;
822
823 ps->current_committed -= nr_merged;
824
825
826
827
828
829
830
831
832
833
834
835 ps->next_free = area_location(ps, ps->current_area) +
836 ps->current_committed + 1;
837
838 return 0;
839}
840
841static void persistent_drop_snapshot(struct dm_exception_store *store)
842{
843 struct pstore *ps = get_info(store);
844
845 ps->valid = 0;
846 if (write_header(ps))
847 DMWARN("write header failed");
848}
849
850static int persistent_ctr(struct dm_exception_store *store, char *options)
851{
852 struct pstore *ps;
853 int r;
854
855
856 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
857 if (!ps)
858 return -ENOMEM;
859
860 ps->store = store;
861 ps->valid = 1;
862 ps->version = SNAPSHOT_DISK_VERSION;
863 ps->area = NULL;
864 ps->zero_area = NULL;
865 ps->header_area = NULL;
866 ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
867 ps->current_committed = 0;
868
869 ps->callback_count = 0;
870 atomic_set(&ps->pending_count, 0);
871 ps->callbacks = NULL;
872
873 ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
874 if (!ps->metadata_wq) {
875 DMERR("couldn't start header metadata update thread");
876 r = -ENOMEM;
877 goto err_workqueue;
878 }
879
880 if (options) {
881 char overflow = toupper(options[0]);
882 if (overflow == 'O')
883 store->userspace_supports_overflow = true;
884 else {
885 DMERR("Unsupported persistent store option: %s", options);
886 r = -EINVAL;
887 goto err_options;
888 }
889 }
890
891 store->context = ps;
892
893 return 0;
894
895err_options:
896 destroy_workqueue(ps->metadata_wq);
897err_workqueue:
898 kfree(ps);
899
900 return r;
901}
902
903static unsigned persistent_status(struct dm_exception_store *store,
904 status_type_t status, char *result,
905 unsigned maxlen)
906{
907 unsigned sz = 0;
908
909 switch (status) {
910 case STATUSTYPE_INFO:
911 break;
912 case STATUSTYPE_TABLE:
913 DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
914 (unsigned long long)store->chunk_size);
915 }
916
917 return sz;
918}
919
920static struct dm_exception_store_type _persistent_type = {
921 .name = "persistent",
922 .module = THIS_MODULE,
923 .ctr = persistent_ctr,
924 .dtr = persistent_dtr,
925 .read_metadata = persistent_read_metadata,
926 .prepare_exception = persistent_prepare_exception,
927 .commit_exception = persistent_commit_exception,
928 .prepare_merge = persistent_prepare_merge,
929 .commit_merge = persistent_commit_merge,
930 .drop_snapshot = persistent_drop_snapshot,
931 .usage = persistent_usage,
932 .status = persistent_status,
933};
934
935static struct dm_exception_store_type _persistent_compat_type = {
936 .name = "P",
937 .module = THIS_MODULE,
938 .ctr = persistent_ctr,
939 .dtr = persistent_dtr,
940 .read_metadata = persistent_read_metadata,
941 .prepare_exception = persistent_prepare_exception,
942 .commit_exception = persistent_commit_exception,
943 .prepare_merge = persistent_prepare_merge,
944 .commit_merge = persistent_commit_merge,
945 .drop_snapshot = persistent_drop_snapshot,
946 .usage = persistent_usage,
947 .status = persistent_status,
948};
949
950int dm_persistent_snapshot_init(void)
951{
952 int r;
953
954 r = dm_exception_store_type_register(&_persistent_type);
955 if (r) {
956 DMERR("Unable to register persistent exception store type");
957 return r;
958 }
959
960 r = dm_exception_store_type_register(&_persistent_compat_type);
961 if (r) {
962 DMERR("Unable to register old-style persistent exception "
963 "store type");
964 dm_exception_store_type_unregister(&_persistent_type);
965 return r;
966 }
967
968 return r;
969}
970
971void dm_persistent_snapshot_exit(void)
972{
973 dm_exception_store_type_unregister(&_persistent_type);
974 dm_exception_store_type_unregister(&_persistent_compat_type);
975}
976