1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41static size_t bitset_size_in_bytes(unsigned nr_entries)
42{
43 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
44}
45
46static unsigned long *alloc_bitset(unsigned nr_entries)
47{
48 size_t s = bitset_size_in_bytes(nr_entries);
49 return vzalloc(s);
50}
51
52static void clear_bitset(void *bitset, unsigned nr_entries)
53{
54 size_t s = bitset_size_in_bytes(nr_entries);
55 memset(bitset, 0, s);
56}
57
58static void free_bitset(unsigned long *bits)
59{
60 vfree(bits);
61}
62
63
64
65
66
67
68
69
70struct dm_hook_info {
71 bio_end_io_t *bi_end_io;
72 void *bi_private;
73};
74
75static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
76 bio_end_io_t *bi_end_io, void *bi_private)
77{
78 h->bi_end_io = bio->bi_end_io;
79 h->bi_private = bio->bi_private;
80
81 bio->bi_end_io = bi_end_io;
82 bio->bi_private = bi_private;
83}
84
85static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
86{
87 bio->bi_end_io = h->bi_end_io;
88 bio->bi_private = h->bi_private;
89
90
91
92
93
94 atomic_inc(&bio->bi_remaining);
95}
96
97
98
99#define MIGRATION_POOL_SIZE 128
100#define COMMIT_PERIOD HZ
101#define MIGRATION_COUNT_WINDOW 10
102
103
104
105
106
107#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
108#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
109
110
111
112
113enum cache_metadata_mode {
114 CM_WRITE,
115 CM_READ_ONLY,
116};
117
118enum cache_io_mode {
119
120
121
122
123
124 CM_IO_WRITEBACK,
125
126
127
128
129
130 CM_IO_WRITETHROUGH,
131
132
133
134
135
136
137
138 CM_IO_PASSTHROUGH
139};
140
141struct cache_features {
142 enum cache_metadata_mode mode;
143 enum cache_io_mode io_mode;
144};
145
146struct cache_stats {
147 atomic_t read_hit;
148 atomic_t read_miss;
149 atomic_t write_hit;
150 atomic_t write_miss;
151 atomic_t demotion;
152 atomic_t promotion;
153 atomic_t copies_avoided;
154 atomic_t cache_cell_clash;
155 atomic_t commit_count;
156 atomic_t discard_count;
157};
158
159
160
161
162
163struct cblock_range {
164 dm_cblock_t begin;
165 dm_cblock_t end;
166};
167
168struct invalidation_request {
169 struct list_head list;
170 struct cblock_range *cblocks;
171
172 atomic_t complete;
173 int err;
174
175 wait_queue_head_t result_wait;
176};
177
178struct cache {
179 struct dm_target *ti;
180 struct dm_target_callbacks callbacks;
181
182 struct dm_cache_metadata *cmd;
183
184
185
186
187 struct dm_dev *metadata_dev;
188
189
190
191
192 struct dm_dev *origin_dev;
193
194
195
196
197 struct dm_dev *cache_dev;
198
199
200
201
202 dm_oblock_t origin_blocks;
203 sector_t origin_sectors;
204
205
206
207
208 dm_cblock_t cache_size;
209
210
211
212
213 uint32_t sectors_per_block;
214 int sectors_per_block_shift;
215
216 spinlock_t lock;
217 struct bio_list deferred_bios;
218 struct bio_list deferred_flush_bios;
219 struct bio_list deferred_writethrough_bios;
220 struct list_head quiesced_migrations;
221 struct list_head completed_migrations;
222 struct list_head need_commit_migrations;
223 sector_t migration_threshold;
224 wait_queue_head_t migration_wait;
225 atomic_t nr_allocated_migrations;
226
227
228
229
230
231 atomic_t nr_io_migrations;
232
233 wait_queue_head_t quiescing_wait;
234 atomic_t quiescing;
235 atomic_t quiescing_ack;
236
237
238
239
240 atomic_t nr_dirty;
241 unsigned long *dirty_bitset;
242
243
244
245
246 dm_dblock_t discard_nr_blocks;
247 unsigned long *discard_bitset;
248 uint32_t discard_block_size;
249
250
251
252
253
254 unsigned nr_ctr_args;
255 const char **ctr_args;
256
257 struct dm_kcopyd_client *copier;
258 struct workqueue_struct *wq;
259 struct work_struct worker;
260
261 struct delayed_work waker;
262 unsigned long last_commit_jiffies;
263
264 struct dm_bio_prison *prison;
265 struct dm_deferred_set *all_io_ds;
266
267 mempool_t *migration_pool;
268
269 struct dm_cache_policy *policy;
270 unsigned policy_nr_args;
271
272 bool need_tick_bio:1;
273 bool sized:1;
274 bool invalidate:1;
275 bool commit_requested:1;
276 bool loaded_mappings:1;
277 bool loaded_discards:1;
278
279
280
281
282 struct cache_features features;
283
284 struct cache_stats stats;
285
286
287
288
289 spinlock_t invalidation_lock;
290 struct list_head invalidation_requests;
291};
292
293struct per_bio_data {
294 bool tick:1;
295 unsigned req_nr:2;
296 struct dm_deferred_entry *all_io_entry;
297 struct dm_hook_info hook_info;
298
299
300
301
302
303
304 struct cache *cache;
305 dm_cblock_t cblock;
306 struct dm_bio_details bio_details;
307};
308
309struct dm_cache_migration {
310 struct list_head list;
311 struct cache *cache;
312
313 unsigned long start_jiffies;
314 dm_oblock_t old_oblock;
315 dm_oblock_t new_oblock;
316 dm_cblock_t cblock;
317
318 bool err:1;
319 bool discard:1;
320 bool writeback:1;
321 bool demote:1;
322 bool promote:1;
323 bool requeue_holder:1;
324 bool invalidate:1;
325
326 struct dm_bio_prison_cell *old_ocell;
327 struct dm_bio_prison_cell *new_ocell;
328};
329
330
331
332
333
334
335struct prealloc {
336 struct dm_cache_migration *mg;
337 struct dm_bio_prison_cell *cell1;
338 struct dm_bio_prison_cell *cell2;
339};
340
341static void wake_worker(struct cache *cache)
342{
343 queue_work(cache->wq, &cache->worker);
344}
345
346
347
348static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
349{
350
351 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
352}
353
354static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
355{
356 dm_bio_prison_free_cell(cache->prison, cell);
357}
358
359static struct dm_cache_migration *alloc_migration(struct cache *cache)
360{
361 struct dm_cache_migration *mg;
362
363 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
364 if (mg) {
365 mg->cache = cache;
366 atomic_inc(&mg->cache->nr_allocated_migrations);
367 }
368
369 return mg;
370}
371
372static void free_migration(struct dm_cache_migration *mg)
373{
374 if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
375 wake_up(&mg->cache->migration_wait);
376
377 mempool_free(mg, mg->cache->migration_pool);
378}
379
380static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
381{
382 if (!p->mg) {
383 p->mg = alloc_migration(cache);
384 if (!p->mg)
385 return -ENOMEM;
386 }
387
388 if (!p->cell1) {
389 p->cell1 = alloc_prison_cell(cache);
390 if (!p->cell1)
391 return -ENOMEM;
392 }
393
394 if (!p->cell2) {
395 p->cell2 = alloc_prison_cell(cache);
396 if (!p->cell2)
397 return -ENOMEM;
398 }
399
400 return 0;
401}
402
403static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
404{
405 if (p->cell2)
406 free_prison_cell(cache, p->cell2);
407
408 if (p->cell1)
409 free_prison_cell(cache, p->cell1);
410
411 if (p->mg)
412 free_migration(p->mg);
413}
414
415static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
416{
417 struct dm_cache_migration *mg = p->mg;
418
419 BUG_ON(!mg);
420 p->mg = NULL;
421
422 return mg;
423}
424
425
426
427
428
429static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
430{
431 struct dm_bio_prison_cell *r = NULL;
432
433 if (p->cell1) {
434 r = p->cell1;
435 p->cell1 = NULL;
436
437 } else if (p->cell2) {
438 r = p->cell2;
439 p->cell2 = NULL;
440 } else
441 BUG();
442
443 return r;
444}
445
446
447
448
449
450static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
451{
452 if (!p->cell2)
453 p->cell2 = cell;
454
455 else if (!p->cell1)
456 p->cell1 = cell;
457
458 else
459 BUG();
460}
461
462
463
464static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
465{
466 key->virtual = 0;
467 key->dev = 0;
468 key->block_begin = from_oblock(begin);
469 key->block_end = from_oblock(end);
470}
471
472
473
474
475
476
477typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
478
479static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
480 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
481 cell_free_fn free_fn, void *free_context,
482 struct dm_bio_prison_cell **cell_result)
483{
484 int r;
485 struct dm_cell_key key;
486
487 build_key(oblock_begin, oblock_end, &key);
488 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
489 if (r)
490 free_fn(free_context, cell_prealloc);
491
492 return r;
493}
494
495static int bio_detain(struct cache *cache, dm_oblock_t oblock,
496 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
497 cell_free_fn free_fn, void *free_context,
498 struct dm_bio_prison_cell **cell_result)
499{
500 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
501 return bio_detain_range(cache, oblock, end, bio,
502 cell_prealloc, free_fn, free_context, cell_result);
503}
504
505static int get_cell(struct cache *cache,
506 dm_oblock_t oblock,
507 struct prealloc *structs,
508 struct dm_bio_prison_cell **cell_result)
509{
510 int r;
511 struct dm_cell_key key;
512 struct dm_bio_prison_cell *cell_prealloc;
513
514 cell_prealloc = prealloc_get_cell(structs);
515
516 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
517 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
518 if (r)
519 prealloc_put_cell(structs, cell_prealloc);
520
521 return r;
522}
523
524
525
526static bool is_dirty(struct cache *cache, dm_cblock_t b)
527{
528 return test_bit(from_cblock(b), cache->dirty_bitset);
529}
530
531static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
532{
533 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
534 atomic_inc(&cache->nr_dirty);
535 policy_set_dirty(cache->policy, oblock);
536 }
537}
538
539static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
540{
541 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
542 policy_clear_dirty(cache->policy, oblock);
543 if (atomic_dec_return(&cache->nr_dirty) == 0)
544 dm_table_event(cache->ti->table);
545 }
546}
547
548
549
550static bool block_size_is_power_of_two(struct cache *cache)
551{
552 return cache->sectors_per_block_shift >= 0;
553}
554
555
556#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
557__always_inline
558#endif
559static dm_block_t block_div(dm_block_t b, uint32_t n)
560{
561 do_div(b, n);
562
563 return b;
564}
565
566static dm_block_t oblocks_per_dblock(struct cache *cache)
567{
568 dm_block_t oblocks = cache->discard_block_size;
569
570 if (block_size_is_power_of_two(cache))
571 oblocks >>= cache->sectors_per_block_shift;
572 else
573 oblocks = block_div(oblocks, cache->sectors_per_block);
574
575 return oblocks;
576}
577
578static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
579{
580 return to_dblock(block_div(from_oblock(oblock),
581 oblocks_per_dblock(cache)));
582}
583
584static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
585{
586 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
587}
588
589static void set_discard(struct cache *cache, dm_dblock_t b)
590{
591 unsigned long flags;
592
593 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
594 atomic_inc(&cache->stats.discard_count);
595
596 spin_lock_irqsave(&cache->lock, flags);
597 set_bit(from_dblock(b), cache->discard_bitset);
598 spin_unlock_irqrestore(&cache->lock, flags);
599}
600
601static void clear_discard(struct cache *cache, dm_dblock_t b)
602{
603 unsigned long flags;
604
605 spin_lock_irqsave(&cache->lock, flags);
606 clear_bit(from_dblock(b), cache->discard_bitset);
607 spin_unlock_irqrestore(&cache->lock, flags);
608}
609
610static bool is_discarded(struct cache *cache, dm_dblock_t b)
611{
612 int r;
613 unsigned long flags;
614
615 spin_lock_irqsave(&cache->lock, flags);
616 r = test_bit(from_dblock(b), cache->discard_bitset);
617 spin_unlock_irqrestore(&cache->lock, flags);
618
619 return r;
620}
621
622static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
623{
624 int r;
625 unsigned long flags;
626
627 spin_lock_irqsave(&cache->lock, flags);
628 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
629 cache->discard_bitset);
630 spin_unlock_irqrestore(&cache->lock, flags);
631
632 return r;
633}
634
635
636
637static void load_stats(struct cache *cache)
638{
639 struct dm_cache_statistics stats;
640
641 dm_cache_metadata_get_stats(cache->cmd, &stats);
642 atomic_set(&cache->stats.read_hit, stats.read_hits);
643 atomic_set(&cache->stats.read_miss, stats.read_misses);
644 atomic_set(&cache->stats.write_hit, stats.write_hits);
645 atomic_set(&cache->stats.write_miss, stats.write_misses);
646}
647
648static void save_stats(struct cache *cache)
649{
650 struct dm_cache_statistics stats;
651
652 stats.read_hits = atomic_read(&cache->stats.read_hit);
653 stats.read_misses = atomic_read(&cache->stats.read_miss);
654 stats.write_hits = atomic_read(&cache->stats.write_hit);
655 stats.write_misses = atomic_read(&cache->stats.write_miss);
656
657 dm_cache_metadata_set_stats(cache->cmd, &stats);
658}
659
660
661
662
663
664
665
666
667#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
668#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
669
670static bool writethrough_mode(struct cache_features *f)
671{
672 return f->io_mode == CM_IO_WRITETHROUGH;
673}
674
675static bool writeback_mode(struct cache_features *f)
676{
677 return f->io_mode == CM_IO_WRITEBACK;
678}
679
680static bool passthrough_mode(struct cache_features *f)
681{
682 return f->io_mode == CM_IO_PASSTHROUGH;
683}
684
685static size_t get_per_bio_data_size(struct cache *cache)
686{
687 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
688}
689
690static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
691{
692 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
693 BUG_ON(!pb);
694 return pb;
695}
696
697static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
698{
699 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
700
701 pb->tick = false;
702 pb->req_nr = dm_bio_get_target_bio_nr(bio);
703 pb->all_io_entry = NULL;
704
705 return pb;
706}
707
708
709
710
711static void remap_to_origin(struct cache *cache, struct bio *bio)
712{
713 bio->bi_bdev = cache->origin_dev->bdev;
714}
715
716static void remap_to_cache(struct cache *cache, struct bio *bio,
717 dm_cblock_t cblock)
718{
719 sector_t bi_sector = bio->bi_iter.bi_sector;
720 sector_t block = from_cblock(cblock);
721
722 bio->bi_bdev = cache->cache_dev->bdev;
723 if (!block_size_is_power_of_two(cache))
724 bio->bi_iter.bi_sector =
725 (block * cache->sectors_per_block) +
726 sector_div(bi_sector, cache->sectors_per_block);
727 else
728 bio->bi_iter.bi_sector =
729 (block << cache->sectors_per_block_shift) |
730 (bi_sector & (cache->sectors_per_block - 1));
731}
732
733static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
734{
735 unsigned long flags;
736 size_t pb_data_size = get_per_bio_data_size(cache);
737 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
738
739 spin_lock_irqsave(&cache->lock, flags);
740 if (cache->need_tick_bio &&
741 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
742 pb->tick = true;
743 cache->need_tick_bio = false;
744 }
745 spin_unlock_irqrestore(&cache->lock, flags);
746}
747
748static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
749 dm_oblock_t oblock)
750{
751 check_if_tick_bio_needed(cache, bio);
752 remap_to_origin(cache, bio);
753 if (bio_data_dir(bio) == WRITE)
754 clear_discard(cache, oblock_to_dblock(cache, oblock));
755}
756
757static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
758 dm_oblock_t oblock, dm_cblock_t cblock)
759{
760 check_if_tick_bio_needed(cache, bio);
761 remap_to_cache(cache, bio, cblock);
762 if (bio_data_dir(bio) == WRITE) {
763 set_dirty(cache, oblock, cblock);
764 clear_discard(cache, oblock_to_dblock(cache, oblock));
765 }
766}
767
768static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
769{
770 sector_t block_nr = bio->bi_iter.bi_sector;
771
772 if (!block_size_is_power_of_two(cache))
773 (void) sector_div(block_nr, cache->sectors_per_block);
774 else
775 block_nr >>= cache->sectors_per_block_shift;
776
777 return to_oblock(block_nr);
778}
779
780static int bio_triggers_commit(struct cache *cache, struct bio *bio)
781{
782 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
783}
784
785
786
787
788
789static void inc_ds(struct cache *cache, struct bio *bio,
790 struct dm_bio_prison_cell *cell)
791{
792 size_t pb_data_size = get_per_bio_data_size(cache);
793 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
794
795 BUG_ON(!cell);
796 BUG_ON(pb->all_io_entry);
797
798 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
799}
800
801static void issue(struct cache *cache, struct bio *bio)
802{
803 unsigned long flags;
804
805 if (!bio_triggers_commit(cache, bio)) {
806 generic_make_request(bio);
807 return;
808 }
809
810
811
812
813
814 spin_lock_irqsave(&cache->lock, flags);
815 cache->commit_requested = true;
816 bio_list_add(&cache->deferred_flush_bios, bio);
817 spin_unlock_irqrestore(&cache->lock, flags);
818}
819
820static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
821{
822 inc_ds(cache, bio, cell);
823 issue(cache, bio);
824}
825
826static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
827{
828 unsigned long flags;
829
830 spin_lock_irqsave(&cache->lock, flags);
831 bio_list_add(&cache->deferred_writethrough_bios, bio);
832 spin_unlock_irqrestore(&cache->lock, flags);
833
834 wake_worker(cache);
835}
836
837static void writethrough_endio(struct bio *bio, int err)
838{
839 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
840
841 dm_unhook_bio(&pb->hook_info, bio);
842
843 if (err) {
844 bio_endio(bio, err);
845 return;
846 }
847
848 dm_bio_restore(&pb->bio_details, bio);
849 remap_to_cache(pb->cache, bio, pb->cblock);
850
851
852
853
854
855
856 defer_writethrough_bio(pb->cache, bio);
857}
858
859
860
861
862
863
864
865static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
866 dm_oblock_t oblock, dm_cblock_t cblock)
867{
868 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
869
870 pb->cache = cache;
871 pb->cblock = cblock;
872 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
873 dm_bio_record(&pb->bio_details, bio);
874
875 remap_to_origin_clear_discard(pb->cache, bio, oblock);
876}
877
878
879
880
881
882
883
884static void inc_io_migrations(struct cache *cache)
885{
886 atomic_inc(&cache->nr_io_migrations);
887}
888
889static void dec_io_migrations(struct cache *cache)
890{
891 atomic_dec(&cache->nr_io_migrations);
892}
893
894static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
895 bool holder)
896{
897 (holder ? dm_cell_release : dm_cell_release_no_holder)
898 (cache->prison, cell, &cache->deferred_bios);
899 free_prison_cell(cache, cell);
900}
901
902static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
903 bool holder)
904{
905 unsigned long flags;
906
907 spin_lock_irqsave(&cache->lock, flags);
908 __cell_defer(cache, cell, holder);
909 spin_unlock_irqrestore(&cache->lock, flags);
910
911 wake_worker(cache);
912}
913
914static void free_io_migration(struct dm_cache_migration *mg)
915{
916 dec_io_migrations(mg->cache);
917 free_migration(mg);
918}
919
920static void migration_failure(struct dm_cache_migration *mg)
921{
922 struct cache *cache = mg->cache;
923
924 if (mg->writeback) {
925 DMWARN_LIMIT("writeback failed; couldn't copy block");
926 set_dirty(cache, mg->old_oblock, mg->cblock);
927 cell_defer(cache, mg->old_ocell, false);
928
929 } else if (mg->demote) {
930 DMWARN_LIMIT("demotion failed; couldn't copy block");
931 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
932
933 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
934 if (mg->promote)
935 cell_defer(cache, mg->new_ocell, true);
936 } else {
937 DMWARN_LIMIT("promotion failed; couldn't copy block");
938 policy_remove_mapping(cache->policy, mg->new_oblock);
939 cell_defer(cache, mg->new_ocell, true);
940 }
941
942 free_io_migration(mg);
943}
944
945static void migration_success_pre_commit(struct dm_cache_migration *mg)
946{
947 unsigned long flags;
948 struct cache *cache = mg->cache;
949
950 if (mg->writeback) {
951 clear_dirty(cache, mg->old_oblock, mg->cblock);
952 cell_defer(cache, mg->old_ocell, false);
953 free_io_migration(mg);
954 return;
955
956 } else if (mg->demote) {
957 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
958 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
959 policy_force_mapping(cache->policy, mg->new_oblock,
960 mg->old_oblock);
961 if (mg->promote)
962 cell_defer(cache, mg->new_ocell, true);
963 free_io_migration(mg);
964 return;
965 }
966 } else {
967 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
968 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
969 policy_remove_mapping(cache->policy, mg->new_oblock);
970 free_io_migration(mg);
971 return;
972 }
973 }
974
975 spin_lock_irqsave(&cache->lock, flags);
976 list_add_tail(&mg->list, &cache->need_commit_migrations);
977 cache->commit_requested = true;
978 spin_unlock_irqrestore(&cache->lock, flags);
979}
980
981static void migration_success_post_commit(struct dm_cache_migration *mg)
982{
983 unsigned long flags;
984 struct cache *cache = mg->cache;
985
986 if (mg->writeback) {
987 DMWARN("writeback unexpectedly triggered commit");
988 return;
989
990 } else if (mg->demote) {
991 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
992
993 if (mg->promote) {
994 mg->demote = false;
995
996 spin_lock_irqsave(&cache->lock, flags);
997 list_add_tail(&mg->list, &cache->quiesced_migrations);
998 spin_unlock_irqrestore(&cache->lock, flags);
999
1000 } else {
1001 if (mg->invalidate)
1002 policy_remove_mapping(cache->policy, mg->old_oblock);
1003 free_io_migration(mg);
1004 }
1005
1006 } else {
1007 if (mg->requeue_holder) {
1008 clear_dirty(cache, mg->new_oblock, mg->cblock);
1009 cell_defer(cache, mg->new_ocell, true);
1010 } else {
1011
1012
1013
1014 set_dirty(cache, mg->new_oblock, mg->cblock);
1015 bio_endio(mg->new_ocell->holder, 0);
1016 cell_defer(cache, mg->new_ocell, false);
1017 }
1018 free_io_migration(mg);
1019 }
1020}
1021
1022static void copy_complete(int read_err, unsigned long write_err, void *context)
1023{
1024 unsigned long flags;
1025 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1026 struct cache *cache = mg->cache;
1027
1028 if (read_err || write_err)
1029 mg->err = true;
1030
1031 spin_lock_irqsave(&cache->lock, flags);
1032 list_add_tail(&mg->list, &cache->completed_migrations);
1033 spin_unlock_irqrestore(&cache->lock, flags);
1034
1035 wake_worker(cache);
1036}
1037
1038static void issue_copy(struct dm_cache_migration *mg)
1039{
1040 int r;
1041 struct dm_io_region o_region, c_region;
1042 struct cache *cache = mg->cache;
1043 sector_t cblock = from_cblock(mg->cblock);
1044
1045 o_region.bdev = cache->origin_dev->bdev;
1046 o_region.count = cache->sectors_per_block;
1047
1048 c_region.bdev = cache->cache_dev->bdev;
1049 c_region.sector = cblock * cache->sectors_per_block;
1050 c_region.count = cache->sectors_per_block;
1051
1052 if (mg->writeback || mg->demote) {
1053
1054 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1055 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1056 } else {
1057
1058 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1059 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1060 }
1061
1062 if (r < 0) {
1063 DMERR_LIMIT("issuing migration failed");
1064 migration_failure(mg);
1065 }
1066}
1067
1068static void overwrite_endio(struct bio *bio, int err)
1069{
1070 struct dm_cache_migration *mg = bio->bi_private;
1071 struct cache *cache = mg->cache;
1072 size_t pb_data_size = get_per_bio_data_size(cache);
1073 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1074 unsigned long flags;
1075
1076 dm_unhook_bio(&pb->hook_info, bio);
1077
1078 if (err)
1079 mg->err = true;
1080
1081 mg->requeue_holder = false;
1082
1083 spin_lock_irqsave(&cache->lock, flags);
1084 list_add_tail(&mg->list, &cache->completed_migrations);
1085 spin_unlock_irqrestore(&cache->lock, flags);
1086
1087 wake_worker(cache);
1088}
1089
1090static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1091{
1092 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1093 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1094
1095 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1096 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1097
1098
1099
1100
1101
1102 generic_make_request(bio);
1103}
1104
1105static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1106{
1107 return (bio_data_dir(bio) == WRITE) &&
1108 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1109}
1110
1111static void avoid_copy(struct dm_cache_migration *mg)
1112{
1113 atomic_inc(&mg->cache->stats.copies_avoided);
1114 migration_success_pre_commit(mg);
1115}
1116
1117static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1118 dm_dblock_t *b, dm_dblock_t *e)
1119{
1120 sector_t sb = bio->bi_iter.bi_sector;
1121 sector_t se = bio_end_sector(bio);
1122
1123 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1124
1125 if (se - sb < cache->discard_block_size)
1126 *e = *b;
1127 else
1128 *e = to_dblock(block_div(se, cache->discard_block_size));
1129}
1130
1131static void issue_discard(struct dm_cache_migration *mg)
1132{
1133 dm_dblock_t b, e;
1134 struct bio *bio = mg->new_ocell->holder;
1135
1136 calc_discard_block_range(mg->cache, bio, &b, &e);
1137 while (b != e) {
1138 set_discard(mg->cache, b);
1139 b = to_dblock(from_dblock(b) + 1);
1140 }
1141
1142 bio_endio(bio, 0);
1143 cell_defer(mg->cache, mg->new_ocell, false);
1144 free_migration(mg);
1145}
1146
1147static void issue_copy_or_discard(struct dm_cache_migration *mg)
1148{
1149 bool avoid;
1150 struct cache *cache = mg->cache;
1151
1152 if (mg->discard) {
1153 issue_discard(mg);
1154 return;
1155 }
1156
1157 if (mg->writeback || mg->demote)
1158 avoid = !is_dirty(cache, mg->cblock) ||
1159 is_discarded_oblock(cache, mg->old_oblock);
1160 else {
1161 struct bio *bio = mg->new_ocell->holder;
1162
1163 avoid = is_discarded_oblock(cache, mg->new_oblock);
1164
1165 if (writeback_mode(&cache->features) &&
1166 !avoid && bio_writes_complete_block(cache, bio)) {
1167 issue_overwrite(mg, bio);
1168 return;
1169 }
1170 }
1171
1172 avoid ? avoid_copy(mg) : issue_copy(mg);
1173}
1174
1175static void complete_migration(struct dm_cache_migration *mg)
1176{
1177 if (mg->err)
1178 migration_failure(mg);
1179 else
1180 migration_success_pre_commit(mg);
1181}
1182
1183static void process_migrations(struct cache *cache, struct list_head *head,
1184 void (*fn)(struct dm_cache_migration *))
1185{
1186 unsigned long flags;
1187 struct list_head list;
1188 struct dm_cache_migration *mg, *tmp;
1189
1190 INIT_LIST_HEAD(&list);
1191 spin_lock_irqsave(&cache->lock, flags);
1192 list_splice_init(head, &list);
1193 spin_unlock_irqrestore(&cache->lock, flags);
1194
1195 list_for_each_entry_safe(mg, tmp, &list, list)
1196 fn(mg);
1197}
1198
1199static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1200{
1201 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1202}
1203
1204static void queue_quiesced_migration(struct dm_cache_migration *mg)
1205{
1206 unsigned long flags;
1207 struct cache *cache = mg->cache;
1208
1209 spin_lock_irqsave(&cache->lock, flags);
1210 __queue_quiesced_migration(mg);
1211 spin_unlock_irqrestore(&cache->lock, flags);
1212
1213 wake_worker(cache);
1214}
1215
1216static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1217{
1218 unsigned long flags;
1219 struct dm_cache_migration *mg, *tmp;
1220
1221 spin_lock_irqsave(&cache->lock, flags);
1222 list_for_each_entry_safe(mg, tmp, work, list)
1223 __queue_quiesced_migration(mg);
1224 spin_unlock_irqrestore(&cache->lock, flags);
1225
1226 wake_worker(cache);
1227}
1228
1229static void check_for_quiesced_migrations(struct cache *cache,
1230 struct per_bio_data *pb)
1231{
1232 struct list_head work;
1233
1234 if (!pb->all_io_entry)
1235 return;
1236
1237 INIT_LIST_HEAD(&work);
1238 dm_deferred_entry_dec(pb->all_io_entry, &work);
1239
1240 if (!list_empty(&work))
1241 queue_quiesced_migrations(cache, &work);
1242}
1243
1244static void quiesce_migration(struct dm_cache_migration *mg)
1245{
1246 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1247 queue_quiesced_migration(mg);
1248}
1249
1250static void promote(struct cache *cache, struct prealloc *structs,
1251 dm_oblock_t oblock, dm_cblock_t cblock,
1252 struct dm_bio_prison_cell *cell)
1253{
1254 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1255
1256 mg->err = false;
1257 mg->discard = false;
1258 mg->writeback = false;
1259 mg->demote = false;
1260 mg->promote = true;
1261 mg->requeue_holder = true;
1262 mg->invalidate = false;
1263 mg->cache = cache;
1264 mg->new_oblock = oblock;
1265 mg->cblock = cblock;
1266 mg->old_ocell = NULL;
1267 mg->new_ocell = cell;
1268 mg->start_jiffies = jiffies;
1269
1270 inc_io_migrations(cache);
1271 quiesce_migration(mg);
1272}
1273
1274static void writeback(struct cache *cache, struct prealloc *structs,
1275 dm_oblock_t oblock, dm_cblock_t cblock,
1276 struct dm_bio_prison_cell *cell)
1277{
1278 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1279
1280 mg->err = false;
1281 mg->discard = false;
1282 mg->writeback = true;
1283 mg->demote = false;
1284 mg->promote = false;
1285 mg->requeue_holder = true;
1286 mg->invalidate = false;
1287 mg->cache = cache;
1288 mg->old_oblock = oblock;
1289 mg->cblock = cblock;
1290 mg->old_ocell = cell;
1291 mg->new_ocell = NULL;
1292 mg->start_jiffies = jiffies;
1293
1294 inc_io_migrations(cache);
1295 quiesce_migration(mg);
1296}
1297
1298static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1299 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1300 dm_cblock_t cblock,
1301 struct dm_bio_prison_cell *old_ocell,
1302 struct dm_bio_prison_cell *new_ocell)
1303{
1304 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1305
1306 mg->err = false;
1307 mg->discard = false;
1308 mg->writeback = false;
1309 mg->demote = true;
1310 mg->promote = true;
1311 mg->requeue_holder = true;
1312 mg->invalidate = false;
1313 mg->cache = cache;
1314 mg->old_oblock = old_oblock;
1315 mg->new_oblock = new_oblock;
1316 mg->cblock = cblock;
1317 mg->old_ocell = old_ocell;
1318 mg->new_ocell = new_ocell;
1319 mg->start_jiffies = jiffies;
1320
1321 inc_io_migrations(cache);
1322 quiesce_migration(mg);
1323}
1324
1325
1326
1327
1328
1329static void invalidate(struct cache *cache, struct prealloc *structs,
1330 dm_oblock_t oblock, dm_cblock_t cblock,
1331 struct dm_bio_prison_cell *cell)
1332{
1333 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1334
1335 mg->err = false;
1336 mg->discard = false;
1337 mg->writeback = false;
1338 mg->demote = true;
1339 mg->promote = false;
1340 mg->requeue_holder = true;
1341 mg->invalidate = true;
1342 mg->cache = cache;
1343 mg->old_oblock = oblock;
1344 mg->cblock = cblock;
1345 mg->old_ocell = cell;
1346 mg->new_ocell = NULL;
1347 mg->start_jiffies = jiffies;
1348
1349 inc_io_migrations(cache);
1350 quiesce_migration(mg);
1351}
1352
1353static void discard(struct cache *cache, struct prealloc *structs,
1354 struct dm_bio_prison_cell *cell)
1355{
1356 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1357
1358 mg->err = false;
1359 mg->discard = true;
1360 mg->writeback = false;
1361 mg->demote = false;
1362 mg->promote = false;
1363 mg->requeue_holder = false;
1364 mg->invalidate = false;
1365 mg->cache = cache;
1366 mg->old_ocell = NULL;
1367 mg->new_ocell = cell;
1368 mg->start_jiffies = jiffies;
1369
1370 quiesce_migration(mg);
1371}
1372
1373
1374
1375
1376static void defer_bio(struct cache *cache, struct bio *bio)
1377{
1378 unsigned long flags;
1379
1380 spin_lock_irqsave(&cache->lock, flags);
1381 bio_list_add(&cache->deferred_bios, bio);
1382 spin_unlock_irqrestore(&cache->lock, flags);
1383
1384 wake_worker(cache);
1385}
1386
1387static void process_flush_bio(struct cache *cache, struct bio *bio)
1388{
1389 size_t pb_data_size = get_per_bio_data_size(cache);
1390 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1391
1392 BUG_ON(bio->bi_iter.bi_size);
1393 if (!pb->req_nr)
1394 remap_to_origin(cache, bio);
1395 else
1396 remap_to_cache(cache, bio, 0);
1397
1398
1399
1400
1401
1402
1403 issue(cache, bio);
1404}
1405
1406static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1407 struct bio *bio)
1408{
1409 int r;
1410 dm_dblock_t b, e;
1411 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1412
1413 calc_discard_block_range(cache, bio, &b, &e);
1414 if (b == e) {
1415 bio_endio(bio, 0);
1416 return;
1417 }
1418
1419 cell_prealloc = prealloc_get_cell(structs);
1420 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1421 (cell_free_fn) prealloc_put_cell,
1422 structs, &new_ocell);
1423 if (r > 0)
1424 return;
1425
1426 discard(cache, structs, new_ocell);
1427}
1428
1429static bool spare_migration_bandwidth(struct cache *cache)
1430{
1431 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1432 cache->sectors_per_block;
1433 return current_volume < cache->migration_threshold;
1434}
1435
1436static void inc_hit_counter(struct cache *cache, struct bio *bio)
1437{
1438 atomic_inc(bio_data_dir(bio) == READ ?
1439 &cache->stats.read_hit : &cache->stats.write_hit);
1440}
1441
1442static void inc_miss_counter(struct cache *cache, struct bio *bio)
1443{
1444 atomic_inc(bio_data_dir(bio) == READ ?
1445 &cache->stats.read_miss : &cache->stats.write_miss);
1446}
1447
1448static void process_bio(struct cache *cache, struct prealloc *structs,
1449 struct bio *bio)
1450{
1451 int r;
1452 bool release_cell = true;
1453 dm_oblock_t block = get_bio_block(cache, bio);
1454 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
1455 struct policy_result lookup_result;
1456 bool passthrough = passthrough_mode(&cache->features);
1457 bool discarded_block, can_migrate;
1458
1459
1460
1461
1462 cell_prealloc = prealloc_get_cell(structs);
1463 r = bio_detain(cache, block, bio, cell_prealloc,
1464 (cell_free_fn) prealloc_put_cell,
1465 structs, &new_ocell);
1466 if (r > 0)
1467 return;
1468
1469 discarded_block = is_discarded_oblock(cache, block);
1470 can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
1471
1472 r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
1473 bio, &lookup_result);
1474
1475 if (r == -EWOULDBLOCK)
1476
1477 lookup_result.op = POLICY_MISS;
1478
1479 switch (lookup_result.op) {
1480 case POLICY_HIT:
1481 if (passthrough) {
1482 inc_miss_counter(cache, bio);
1483
1484
1485
1486
1487
1488
1489
1490 if (bio_data_dir(bio) == WRITE) {
1491 atomic_inc(&cache->stats.demotion);
1492 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1493 release_cell = false;
1494
1495 } else {
1496
1497 remap_to_origin_clear_discard(cache, bio, block);
1498 inc_and_issue(cache, bio, new_ocell);
1499 }
1500 } else {
1501 inc_hit_counter(cache, bio);
1502
1503 if (bio_data_dir(bio) == WRITE &&
1504 writethrough_mode(&cache->features) &&
1505 !is_dirty(cache, lookup_result.cblock)) {
1506 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1507 inc_and_issue(cache, bio, new_ocell);
1508
1509 } else {
1510 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
1511 inc_and_issue(cache, bio, new_ocell);
1512 }
1513 }
1514
1515 break;
1516
1517 case POLICY_MISS:
1518 inc_miss_counter(cache, bio);
1519 remap_to_origin_clear_discard(cache, bio, block);
1520 inc_and_issue(cache, bio, new_ocell);
1521 break;
1522
1523 case POLICY_NEW:
1524 atomic_inc(&cache->stats.promotion);
1525 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1526 release_cell = false;
1527 break;
1528
1529 case POLICY_REPLACE:
1530 cell_prealloc = prealloc_get_cell(structs);
1531 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc,
1532 (cell_free_fn) prealloc_put_cell,
1533 structs, &old_ocell);
1534 if (r > 0) {
1535
1536
1537
1538
1539
1540 policy_force_mapping(cache->policy, block,
1541 lookup_result.old_oblock);
1542 atomic_inc(&cache->stats.cache_cell_clash);
1543 break;
1544 }
1545 atomic_inc(&cache->stats.demotion);
1546 atomic_inc(&cache->stats.promotion);
1547
1548 demote_then_promote(cache, structs, lookup_result.old_oblock,
1549 block, lookup_result.cblock,
1550 old_ocell, new_ocell);
1551 release_cell = false;
1552 break;
1553
1554 default:
1555 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__,
1556 (unsigned) lookup_result.op);
1557 bio_io_error(bio);
1558 }
1559
1560 if (release_cell)
1561 cell_defer(cache, new_ocell, false);
1562}
1563
1564static int need_commit_due_to_time(struct cache *cache)
1565{
1566 return !time_in_range(jiffies, cache->last_commit_jiffies,
1567 cache->last_commit_jiffies + COMMIT_PERIOD);
1568}
1569
1570static int commit_if_needed(struct cache *cache)
1571{
1572 int r = 0;
1573
1574 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1575 dm_cache_changed_this_transaction(cache->cmd)) {
1576 atomic_inc(&cache->stats.commit_count);
1577 cache->commit_requested = false;
1578 r = dm_cache_commit(cache->cmd, false);
1579 cache->last_commit_jiffies = jiffies;
1580 }
1581
1582 return r;
1583}
1584
1585static void process_deferred_bios(struct cache *cache)
1586{
1587 unsigned long flags;
1588 struct bio_list bios;
1589 struct bio *bio;
1590 struct prealloc structs;
1591
1592 memset(&structs, 0, sizeof(structs));
1593 bio_list_init(&bios);
1594
1595 spin_lock_irqsave(&cache->lock, flags);
1596 bio_list_merge(&bios, &cache->deferred_bios);
1597 bio_list_init(&cache->deferred_bios);
1598 spin_unlock_irqrestore(&cache->lock, flags);
1599
1600 while (!bio_list_empty(&bios)) {
1601
1602
1603
1604
1605
1606 if (prealloc_data_structs(cache, &structs)) {
1607 spin_lock_irqsave(&cache->lock, flags);
1608 bio_list_merge(&cache->deferred_bios, &bios);
1609 spin_unlock_irqrestore(&cache->lock, flags);
1610 break;
1611 }
1612
1613 bio = bio_list_pop(&bios);
1614
1615 if (bio->bi_rw & REQ_FLUSH)
1616 process_flush_bio(cache, bio);
1617 else if (bio->bi_rw & REQ_DISCARD)
1618 process_discard_bio(cache, &structs, bio);
1619 else
1620 process_bio(cache, &structs, bio);
1621 }
1622
1623 prealloc_free_structs(cache, &structs);
1624}
1625
1626static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
1627{
1628 unsigned long flags;
1629 struct bio_list bios;
1630 struct bio *bio;
1631
1632 bio_list_init(&bios);
1633
1634 spin_lock_irqsave(&cache->lock, flags);
1635 bio_list_merge(&bios, &cache->deferred_flush_bios);
1636 bio_list_init(&cache->deferred_flush_bios);
1637 spin_unlock_irqrestore(&cache->lock, flags);
1638
1639
1640
1641
1642 while ((bio = bio_list_pop(&bios)))
1643 submit_bios ? generic_make_request(bio) : bio_io_error(bio);
1644}
1645
1646static void process_deferred_writethrough_bios(struct cache *cache)
1647{
1648 unsigned long flags;
1649 struct bio_list bios;
1650 struct bio *bio;
1651
1652 bio_list_init(&bios);
1653
1654 spin_lock_irqsave(&cache->lock, flags);
1655 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
1656 bio_list_init(&cache->deferred_writethrough_bios);
1657 spin_unlock_irqrestore(&cache->lock, flags);
1658
1659
1660
1661
1662 while ((bio = bio_list_pop(&bios)))
1663 generic_make_request(bio);
1664}
1665
1666static void writeback_some_dirty_blocks(struct cache *cache)
1667{
1668 int r = 0;
1669 dm_oblock_t oblock;
1670 dm_cblock_t cblock;
1671 struct prealloc structs;
1672 struct dm_bio_prison_cell *old_ocell;
1673
1674 memset(&structs, 0, sizeof(structs));
1675
1676 while (spare_migration_bandwidth(cache)) {
1677 if (prealloc_data_structs(cache, &structs))
1678 break;
1679
1680 r = policy_writeback_work(cache->policy, &oblock, &cblock);
1681 if (r)
1682 break;
1683
1684 r = get_cell(cache, oblock, &structs, &old_ocell);
1685 if (r) {
1686 policy_set_dirty(cache->policy, oblock);
1687 break;
1688 }
1689
1690 writeback(cache, &structs, oblock, cblock, old_ocell);
1691 }
1692
1693 prealloc_free_structs(cache, &structs);
1694}
1695
1696
1697
1698
1699
1700
1701static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
1702{
1703 int r = 0;
1704 uint64_t begin = from_cblock(req->cblocks->begin);
1705 uint64_t end = from_cblock(req->cblocks->end);
1706
1707 while (begin != end) {
1708 r = policy_remove_cblock(cache->policy, to_cblock(begin));
1709 if (!r) {
1710 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
1711 if (r)
1712 break;
1713
1714 } else if (r == -ENODATA) {
1715
1716 r = 0;
1717
1718 } else {
1719 DMERR("policy_remove_cblock failed");
1720 break;
1721 }
1722
1723 begin++;
1724 }
1725
1726 cache->commit_requested = true;
1727
1728 req->err = r;
1729 atomic_set(&req->complete, 1);
1730
1731 wake_up(&req->result_wait);
1732}
1733
1734static void process_invalidation_requests(struct cache *cache)
1735{
1736 struct list_head list;
1737 struct invalidation_request *req, *tmp;
1738
1739 INIT_LIST_HEAD(&list);
1740 spin_lock(&cache->invalidation_lock);
1741 list_splice_init(&cache->invalidation_requests, &list);
1742 spin_unlock(&cache->invalidation_lock);
1743
1744 list_for_each_entry_safe (req, tmp, &list, list)
1745 process_invalidation_request(cache, req);
1746}
1747
1748
1749
1750
1751static bool is_quiescing(struct cache *cache)
1752{
1753 return atomic_read(&cache->quiescing);
1754}
1755
1756static void ack_quiescing(struct cache *cache)
1757{
1758 if (is_quiescing(cache)) {
1759 atomic_inc(&cache->quiescing_ack);
1760 wake_up(&cache->quiescing_wait);
1761 }
1762}
1763
1764static void wait_for_quiescing_ack(struct cache *cache)
1765{
1766 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
1767}
1768
1769static void start_quiescing(struct cache *cache)
1770{
1771 atomic_inc(&cache->quiescing);
1772 wait_for_quiescing_ack(cache);
1773}
1774
1775static void stop_quiescing(struct cache *cache)
1776{
1777 atomic_set(&cache->quiescing, 0);
1778 atomic_set(&cache->quiescing_ack, 0);
1779}
1780
1781static void wait_for_migrations(struct cache *cache)
1782{
1783 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
1784}
1785
1786static void stop_worker(struct cache *cache)
1787{
1788 cancel_delayed_work(&cache->waker);
1789 flush_workqueue(cache->wq);
1790}
1791
1792static void requeue_deferred_io(struct cache *cache)
1793{
1794 struct bio *bio;
1795 struct bio_list bios;
1796
1797 bio_list_init(&bios);
1798 bio_list_merge(&bios, &cache->deferred_bios);
1799 bio_list_init(&cache->deferred_bios);
1800
1801 while ((bio = bio_list_pop(&bios)))
1802 bio_endio(bio, DM_ENDIO_REQUEUE);
1803}
1804
1805static int more_work(struct cache *cache)
1806{
1807 if (is_quiescing(cache))
1808 return !list_empty(&cache->quiesced_migrations) ||
1809 !list_empty(&cache->completed_migrations) ||
1810 !list_empty(&cache->need_commit_migrations);
1811 else
1812 return !bio_list_empty(&cache->deferred_bios) ||
1813 !bio_list_empty(&cache->deferred_flush_bios) ||
1814 !bio_list_empty(&cache->deferred_writethrough_bios) ||
1815 !list_empty(&cache->quiesced_migrations) ||
1816 !list_empty(&cache->completed_migrations) ||
1817 !list_empty(&cache->need_commit_migrations) ||
1818 cache->invalidate;
1819}
1820
1821static void do_worker(struct work_struct *ws)
1822{
1823 struct cache *cache = container_of(ws, struct cache, worker);
1824
1825 do {
1826 if (!is_quiescing(cache)) {
1827 writeback_some_dirty_blocks(cache);
1828 process_deferred_writethrough_bios(cache);
1829 process_deferred_bios(cache);
1830 process_invalidation_requests(cache);
1831 }
1832
1833 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
1834 process_migrations(cache, &cache->completed_migrations, complete_migration);
1835
1836 if (commit_if_needed(cache)) {
1837 process_deferred_flush_bios(cache, false);
1838 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
1839
1840
1841
1842
1843
1844 } else {
1845 process_deferred_flush_bios(cache, true);
1846 process_migrations(cache, &cache->need_commit_migrations,
1847 migration_success_post_commit);
1848 }
1849
1850 ack_quiescing(cache);
1851
1852 } while (more_work(cache));
1853}
1854
1855
1856
1857
1858
1859static void do_waker(struct work_struct *ws)
1860{
1861 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1862 policy_tick(cache->policy);
1863 wake_worker(cache);
1864 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1865}
1866
1867
1868
1869static int is_congested(struct dm_dev *dev, int bdi_bits)
1870{
1871 struct request_queue *q = bdev_get_queue(dev->bdev);
1872 return bdi_congested(&q->backing_dev_info, bdi_bits);
1873}
1874
1875static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1876{
1877 struct cache *cache = container_of(cb, struct cache, callbacks);
1878
1879 return is_congested(cache->origin_dev, bdi_bits) ||
1880 is_congested(cache->cache_dev, bdi_bits);
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891static void destroy(struct cache *cache)
1892{
1893 unsigned i;
1894
1895 if (cache->migration_pool)
1896 mempool_destroy(cache->migration_pool);
1897
1898 if (cache->all_io_ds)
1899 dm_deferred_set_destroy(cache->all_io_ds);
1900
1901 if (cache->prison)
1902 dm_bio_prison_destroy(cache->prison);
1903
1904 if (cache->wq)
1905 destroy_workqueue(cache->wq);
1906
1907 if (cache->dirty_bitset)
1908 free_bitset(cache->dirty_bitset);
1909
1910 if (cache->discard_bitset)
1911 free_bitset(cache->discard_bitset);
1912
1913 if (cache->copier)
1914 dm_kcopyd_client_destroy(cache->copier);
1915
1916 if (cache->cmd)
1917 dm_cache_metadata_close(cache->cmd);
1918
1919 if (cache->metadata_dev)
1920 dm_put_device(cache->ti, cache->metadata_dev);
1921
1922 if (cache->origin_dev)
1923 dm_put_device(cache->ti, cache->origin_dev);
1924
1925 if (cache->cache_dev)
1926 dm_put_device(cache->ti, cache->cache_dev);
1927
1928 if (cache->policy)
1929 dm_cache_policy_destroy(cache->policy);
1930
1931 for (i = 0; i < cache->nr_ctr_args ; i++)
1932 kfree(cache->ctr_args[i]);
1933 kfree(cache->ctr_args);
1934
1935 kfree(cache);
1936}
1937
1938static void cache_dtr(struct dm_target *ti)
1939{
1940 struct cache *cache = ti->private;
1941
1942 destroy(cache);
1943}
1944
1945static sector_t get_dev_size(struct dm_dev *dev)
1946{
1947 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
1948}
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981struct cache_args {
1982 struct dm_target *ti;
1983
1984 struct dm_dev *metadata_dev;
1985
1986 struct dm_dev *cache_dev;
1987 sector_t cache_sectors;
1988
1989 struct dm_dev *origin_dev;
1990 sector_t origin_sectors;
1991
1992 uint32_t block_size;
1993
1994 const char *policy_name;
1995 int policy_argc;
1996 const char **policy_argv;
1997
1998 struct cache_features features;
1999};
2000
2001static void destroy_cache_args(struct cache_args *ca)
2002{
2003 if (ca->metadata_dev)
2004 dm_put_device(ca->ti, ca->metadata_dev);
2005
2006 if (ca->cache_dev)
2007 dm_put_device(ca->ti, ca->cache_dev);
2008
2009 if (ca->origin_dev)
2010 dm_put_device(ca->ti, ca->origin_dev);
2011
2012 kfree(ca);
2013}
2014
2015static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2016{
2017 if (!as->argc) {
2018 *error = "Insufficient args";
2019 return false;
2020 }
2021
2022 return true;
2023}
2024
2025static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2026 char **error)
2027{
2028 int r;
2029 sector_t metadata_dev_size;
2030 char b[BDEVNAME_SIZE];
2031
2032 if (!at_least_one_arg(as, error))
2033 return -EINVAL;
2034
2035 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2036 &ca->metadata_dev);
2037 if (r) {
2038 *error = "Error opening metadata device";
2039 return r;
2040 }
2041
2042 metadata_dev_size = get_dev_size(ca->metadata_dev);
2043 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2044 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2045 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2046
2047 return 0;
2048}
2049
2050static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2051 char **error)
2052{
2053 int r;
2054
2055 if (!at_least_one_arg(as, error))
2056 return -EINVAL;
2057
2058 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2059 &ca->cache_dev);
2060 if (r) {
2061 *error = "Error opening cache device";
2062 return r;
2063 }
2064 ca->cache_sectors = get_dev_size(ca->cache_dev);
2065
2066 return 0;
2067}
2068
2069static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2070 char **error)
2071{
2072 int r;
2073
2074 if (!at_least_one_arg(as, error))
2075 return -EINVAL;
2076
2077 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2078 &ca->origin_dev);
2079 if (r) {
2080 *error = "Error opening origin device";
2081 return r;
2082 }
2083
2084 ca->origin_sectors = get_dev_size(ca->origin_dev);
2085 if (ca->ti->len > ca->origin_sectors) {
2086 *error = "Device size larger than cached device";
2087 return -EINVAL;
2088 }
2089
2090 return 0;
2091}
2092
2093static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2094 char **error)
2095{
2096 unsigned long block_size;
2097
2098 if (!at_least_one_arg(as, error))
2099 return -EINVAL;
2100
2101 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2102 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2103 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2104 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2105 *error = "Invalid data block size";
2106 return -EINVAL;
2107 }
2108
2109 if (block_size > ca->cache_sectors) {
2110 *error = "Data block size is larger than the cache device";
2111 return -EINVAL;
2112 }
2113
2114 ca->block_size = block_size;
2115
2116 return 0;
2117}
2118
2119static void init_features(struct cache_features *cf)
2120{
2121 cf->mode = CM_WRITE;
2122 cf->io_mode = CM_IO_WRITEBACK;
2123}
2124
2125static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2126 char **error)
2127{
2128 static struct dm_arg _args[] = {
2129 {0, 1, "Invalid number of cache feature arguments"},
2130 };
2131
2132 int r;
2133 unsigned argc;
2134 const char *arg;
2135 struct cache_features *cf = &ca->features;
2136
2137 init_features(cf);
2138
2139 r = dm_read_arg_group(_args, as, &argc, error);
2140 if (r)
2141 return -EINVAL;
2142
2143 while (argc--) {
2144 arg = dm_shift_arg(as);
2145
2146 if (!strcasecmp(arg, "writeback"))
2147 cf->io_mode = CM_IO_WRITEBACK;
2148
2149 else if (!strcasecmp(arg, "writethrough"))
2150 cf->io_mode = CM_IO_WRITETHROUGH;
2151
2152 else if (!strcasecmp(arg, "passthrough"))
2153 cf->io_mode = CM_IO_PASSTHROUGH;
2154
2155 else {
2156 *error = "Unrecognised cache feature requested";
2157 return -EINVAL;
2158 }
2159 }
2160
2161 return 0;
2162}
2163
2164static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2165 char **error)
2166{
2167 static struct dm_arg _args[] = {
2168 {0, 1024, "Invalid number of policy arguments"},
2169 };
2170
2171 int r;
2172
2173 if (!at_least_one_arg(as, error))
2174 return -EINVAL;
2175
2176 ca->policy_name = dm_shift_arg(as);
2177
2178 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2179 if (r)
2180 return -EINVAL;
2181
2182 ca->policy_argv = (const char **)as->argv;
2183 dm_consume_args(as, ca->policy_argc);
2184
2185 return 0;
2186}
2187
2188static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2189 char **error)
2190{
2191 int r;
2192 struct dm_arg_set as;
2193
2194 as.argc = argc;
2195 as.argv = argv;
2196
2197 r = parse_metadata_dev(ca, &as, error);
2198 if (r)
2199 return r;
2200
2201 r = parse_cache_dev(ca, &as, error);
2202 if (r)
2203 return r;
2204
2205 r = parse_origin_dev(ca, &as, error);
2206 if (r)
2207 return r;
2208
2209 r = parse_block_size(ca, &as, error);
2210 if (r)
2211 return r;
2212
2213 r = parse_features(ca, &as, error);
2214 if (r)
2215 return r;
2216
2217 r = parse_policy(ca, &as, error);
2218 if (r)
2219 return r;
2220
2221 return 0;
2222}
2223
2224
2225
2226static struct kmem_cache *migration_cache;
2227
2228#define NOT_CORE_OPTION 1
2229
2230static int process_config_option(struct cache *cache, const char *key, const char *value)
2231{
2232 unsigned long tmp;
2233
2234 if (!strcasecmp(key, "migration_threshold")) {
2235 if (kstrtoul(value, 10, &tmp))
2236 return -EINVAL;
2237
2238 cache->migration_threshold = tmp;
2239 return 0;
2240 }
2241
2242 return NOT_CORE_OPTION;
2243}
2244
2245static int set_config_value(struct cache *cache, const char *key, const char *value)
2246{
2247 int r = process_config_option(cache, key, value);
2248
2249 if (r == NOT_CORE_OPTION)
2250 r = policy_set_config_value(cache->policy, key, value);
2251
2252 if (r)
2253 DMWARN("bad config value for %s: %s", key, value);
2254
2255 return r;
2256}
2257
2258static int set_config_values(struct cache *cache, int argc, const char **argv)
2259{
2260 int r = 0;
2261
2262 if (argc & 1) {
2263 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2264 return -EINVAL;
2265 }
2266
2267 while (argc) {
2268 r = set_config_value(cache, argv[0], argv[1]);
2269 if (r)
2270 break;
2271
2272 argc -= 2;
2273 argv += 2;
2274 }
2275
2276 return r;
2277}
2278
2279static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2280 char **error)
2281{
2282 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2283 cache->cache_size,
2284 cache->origin_sectors,
2285 cache->sectors_per_block);
2286 if (IS_ERR(p)) {
2287 *error = "Error creating cache's policy";
2288 return PTR_ERR(p);
2289 }
2290 cache->policy = p;
2291
2292 return 0;
2293}
2294
2295
2296
2297
2298
2299#define MAX_DISCARD_BLOCKS (1 << 14)
2300
2301static bool too_many_discard_blocks(sector_t discard_block_size,
2302 sector_t origin_size)
2303{
2304 (void) sector_div(origin_size, discard_block_size);
2305
2306 return origin_size > MAX_DISCARD_BLOCKS;
2307}
2308
2309static sector_t calculate_discard_block_size(sector_t cache_block_size,
2310 sector_t origin_size)
2311{
2312 sector_t discard_block_size = cache_block_size;
2313
2314 if (origin_size)
2315 while (too_many_discard_blocks(discard_block_size, origin_size))
2316 discard_block_size *= 2;
2317
2318 return discard_block_size;
2319}
2320
2321static void set_cache_size(struct cache *cache, dm_cblock_t size)
2322{
2323 dm_block_t nr_blocks = from_cblock(size);
2324
2325 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2326 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2327 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2328 "Please consider increasing the cache block size to reduce the overall cache block count.",
2329 (unsigned long long) nr_blocks);
2330
2331 cache->cache_size = size;
2332}
2333
2334#define DEFAULT_MIGRATION_THRESHOLD 2048
2335
2336static int cache_create(struct cache_args *ca, struct cache **result)
2337{
2338 int r = 0;
2339 char **error = &ca->ti->error;
2340 struct cache *cache;
2341 struct dm_target *ti = ca->ti;
2342 dm_block_t origin_blocks;
2343 struct dm_cache_metadata *cmd;
2344 bool may_format = ca->features.mode == CM_WRITE;
2345
2346 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2347 if (!cache)
2348 return -ENOMEM;
2349
2350 cache->ti = ca->ti;
2351 ti->private = cache;
2352 ti->num_flush_bios = 2;
2353 ti->flush_supported = true;
2354
2355 ti->num_discard_bios = 1;
2356 ti->discards_supported = true;
2357 ti->discard_zeroes_data_unsupported = true;
2358 ti->split_discard_bios = false;
2359
2360 cache->features = ca->features;
2361 ti->per_bio_data_size = get_per_bio_data_size(cache);
2362
2363 cache->callbacks.congested_fn = cache_is_congested;
2364 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2365
2366 cache->metadata_dev = ca->metadata_dev;
2367 cache->origin_dev = ca->origin_dev;
2368 cache->cache_dev = ca->cache_dev;
2369
2370 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2371
2372
2373 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2374 origin_blocks = block_div(origin_blocks, ca->block_size);
2375 cache->origin_blocks = to_oblock(origin_blocks);
2376
2377 cache->sectors_per_block = ca->block_size;
2378 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2379 r = -EINVAL;
2380 goto bad;
2381 }
2382
2383 if (ca->block_size & (ca->block_size - 1)) {
2384 dm_block_t cache_size = ca->cache_sectors;
2385
2386 cache->sectors_per_block_shift = -1;
2387 cache_size = block_div(cache_size, ca->block_size);
2388 set_cache_size(cache, to_cblock(cache_size));
2389 } else {
2390 cache->sectors_per_block_shift = __ffs(ca->block_size);
2391 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2392 }
2393
2394 r = create_cache_policy(cache, ca, error);
2395 if (r)
2396 goto bad;
2397
2398 cache->policy_nr_args = ca->policy_argc;
2399 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2400
2401 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2402 if (r) {
2403 *error = "Error setting cache policy's config values";
2404 goto bad;
2405 }
2406
2407 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2408 ca->block_size, may_format,
2409 dm_cache_policy_get_hint_size(cache->policy));
2410 if (IS_ERR(cmd)) {
2411 *error = "Error creating metadata object";
2412 r = PTR_ERR(cmd);
2413 goto bad;
2414 }
2415 cache->cmd = cmd;
2416
2417 if (passthrough_mode(&cache->features)) {
2418 bool all_clean;
2419
2420 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2421 if (r) {
2422 *error = "dm_cache_metadata_all_clean() failed";
2423 goto bad;
2424 }
2425
2426 if (!all_clean) {
2427 *error = "Cannot enter passthrough mode unless all blocks are clean";
2428 r = -EINVAL;
2429 goto bad;
2430 }
2431 }
2432
2433 spin_lock_init(&cache->lock);
2434 bio_list_init(&cache->deferred_bios);
2435 bio_list_init(&cache->deferred_flush_bios);
2436 bio_list_init(&cache->deferred_writethrough_bios);
2437 INIT_LIST_HEAD(&cache->quiesced_migrations);
2438 INIT_LIST_HEAD(&cache->completed_migrations);
2439 INIT_LIST_HEAD(&cache->need_commit_migrations);
2440 atomic_set(&cache->nr_allocated_migrations, 0);
2441 atomic_set(&cache->nr_io_migrations, 0);
2442 init_waitqueue_head(&cache->migration_wait);
2443
2444 init_waitqueue_head(&cache->quiescing_wait);
2445 atomic_set(&cache->quiescing, 0);
2446 atomic_set(&cache->quiescing_ack, 0);
2447
2448 r = -ENOMEM;
2449 atomic_set(&cache->nr_dirty, 0);
2450 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2451 if (!cache->dirty_bitset) {
2452 *error = "could not allocate dirty bitset";
2453 goto bad;
2454 }
2455 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2456
2457 cache->discard_block_size =
2458 calculate_discard_block_size(cache->sectors_per_block,
2459 cache->origin_sectors);
2460 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2461 cache->discard_block_size));
2462 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2463 if (!cache->discard_bitset) {
2464 *error = "could not allocate discard bitset";
2465 goto bad;
2466 }
2467 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2468
2469 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2470 if (IS_ERR(cache->copier)) {
2471 *error = "could not create kcopyd client";
2472 r = PTR_ERR(cache->copier);
2473 goto bad;
2474 }
2475
2476 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2477 if (!cache->wq) {
2478 *error = "could not create workqueue for metadata object";
2479 goto bad;
2480 }
2481 INIT_WORK(&cache->worker, do_worker);
2482 INIT_DELAYED_WORK(&cache->waker, do_waker);
2483 cache->last_commit_jiffies = jiffies;
2484
2485 cache->prison = dm_bio_prison_create();
2486 if (!cache->prison) {
2487 *error = "could not create bio prison";
2488 goto bad;
2489 }
2490
2491 cache->all_io_ds = dm_deferred_set_create();
2492 if (!cache->all_io_ds) {
2493 *error = "could not create all_io deferred set";
2494 goto bad;
2495 }
2496
2497 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2498 migration_cache);
2499 if (!cache->migration_pool) {
2500 *error = "Error creating cache's migration mempool";
2501 goto bad;
2502 }
2503
2504 cache->need_tick_bio = true;
2505 cache->sized = false;
2506 cache->invalidate = false;
2507 cache->commit_requested = false;
2508 cache->loaded_mappings = false;
2509 cache->loaded_discards = false;
2510
2511 load_stats(cache);
2512
2513 atomic_set(&cache->stats.demotion, 0);
2514 atomic_set(&cache->stats.promotion, 0);
2515 atomic_set(&cache->stats.copies_avoided, 0);
2516 atomic_set(&cache->stats.cache_cell_clash, 0);
2517 atomic_set(&cache->stats.commit_count, 0);
2518 atomic_set(&cache->stats.discard_count, 0);
2519
2520 spin_lock_init(&cache->invalidation_lock);
2521 INIT_LIST_HEAD(&cache->invalidation_requests);
2522
2523 *result = cache;
2524 return 0;
2525
2526bad:
2527 destroy(cache);
2528 return r;
2529}
2530
2531static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2532{
2533 unsigned i;
2534 const char **copy;
2535
2536 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2537 if (!copy)
2538 return -ENOMEM;
2539 for (i = 0; i < argc; i++) {
2540 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2541 if (!copy[i]) {
2542 while (i--)
2543 kfree(copy[i]);
2544 kfree(copy);
2545 return -ENOMEM;
2546 }
2547 }
2548
2549 cache->nr_ctr_args = argc;
2550 cache->ctr_args = copy;
2551
2552 return 0;
2553}
2554
2555static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2556{
2557 int r = -EINVAL;
2558 struct cache_args *ca;
2559 struct cache *cache = NULL;
2560
2561 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2562 if (!ca) {
2563 ti->error = "Error allocating memory for cache";
2564 return -ENOMEM;
2565 }
2566 ca->ti = ti;
2567
2568 r = parse_cache_args(ca, argc, argv, &ti->error);
2569 if (r)
2570 goto out;
2571
2572 r = cache_create(ca, &cache);
2573 if (r)
2574 goto out;
2575
2576 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2577 if (r) {
2578 destroy(cache);
2579 goto out;
2580 }
2581
2582 ti->private = cache;
2583
2584out:
2585 destroy_cache_args(ca);
2586 return r;
2587}
2588
2589static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
2590{
2591 int r;
2592 dm_oblock_t block = get_bio_block(cache, bio);
2593 size_t pb_data_size = get_per_bio_data_size(cache);
2594 bool can_migrate = false;
2595 bool discarded_block;
2596 struct policy_result lookup_result;
2597 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
2598
2599 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2600
2601
2602
2603
2604
2605 remap_to_origin(cache, bio);
2606 return DM_MAPIO_REMAPPED;
2607 }
2608
2609 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
2610 defer_bio(cache, bio);
2611 return DM_MAPIO_SUBMITTED;
2612 }
2613
2614
2615
2616
2617 *cell = alloc_prison_cell(cache);
2618 if (!*cell) {
2619 defer_bio(cache, bio);
2620 return DM_MAPIO_SUBMITTED;
2621 }
2622
2623 r = bio_detain(cache, block, bio, *cell,
2624 (cell_free_fn) free_prison_cell,
2625 cache, cell);
2626 if (r) {
2627 if (r < 0)
2628 defer_bio(cache, bio);
2629
2630 return DM_MAPIO_SUBMITTED;
2631 }
2632
2633 discarded_block = is_discarded_oblock(cache, block);
2634
2635 r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
2636 bio, &lookup_result);
2637 if (r == -EWOULDBLOCK) {
2638 cell_defer(cache, *cell, true);
2639 return DM_MAPIO_SUBMITTED;
2640
2641 } else if (r) {
2642 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
2643 cell_defer(cache, *cell, false);
2644 bio_io_error(bio);
2645 return DM_MAPIO_SUBMITTED;
2646 }
2647
2648 r = DM_MAPIO_REMAPPED;
2649 switch (lookup_result.op) {
2650 case POLICY_HIT:
2651 if (passthrough_mode(&cache->features)) {
2652 if (bio_data_dir(bio) == WRITE) {
2653
2654
2655
2656
2657 cell_defer(cache, *cell, true);
2658 r = DM_MAPIO_SUBMITTED;
2659
2660 } else {
2661 inc_miss_counter(cache, bio);
2662 remap_to_origin_clear_discard(cache, bio, block);
2663 }
2664
2665 } else {
2666 inc_hit_counter(cache, bio);
2667 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
2668 !is_dirty(cache, lookup_result.cblock))
2669 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
2670 else
2671 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
2672 }
2673 break;
2674
2675 case POLICY_MISS:
2676 inc_miss_counter(cache, bio);
2677 if (pb->req_nr != 0) {
2678
2679
2680
2681
2682 bio_endio(bio, 0);
2683 cell_defer(cache, *cell, false);
2684 r = DM_MAPIO_SUBMITTED;
2685
2686 } else
2687 remap_to_origin_clear_discard(cache, bio, block);
2688
2689 break;
2690
2691 default:
2692 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
2693 (unsigned) lookup_result.op);
2694 cell_defer(cache, *cell, false);
2695 bio_io_error(bio);
2696 r = DM_MAPIO_SUBMITTED;
2697 }
2698
2699 return r;
2700}
2701
2702static int cache_map(struct dm_target *ti, struct bio *bio)
2703{
2704 int r;
2705 struct dm_bio_prison_cell *cell = NULL;
2706 struct cache *cache = ti->private;
2707
2708 r = __cache_map(cache, bio, &cell);
2709 if (r == DM_MAPIO_REMAPPED && cell) {
2710 inc_ds(cache, bio, cell);
2711 cell_defer(cache, cell, false);
2712 }
2713
2714 return r;
2715}
2716
2717static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2718{
2719 struct cache *cache = ti->private;
2720 unsigned long flags;
2721 size_t pb_data_size = get_per_bio_data_size(cache);
2722 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
2723
2724 if (pb->tick) {
2725 policy_tick(cache->policy);
2726
2727 spin_lock_irqsave(&cache->lock, flags);
2728 cache->need_tick_bio = true;
2729 spin_unlock_irqrestore(&cache->lock, flags);
2730 }
2731
2732 check_for_quiesced_migrations(cache, pb);
2733
2734 return 0;
2735}
2736
2737static int write_dirty_bitset(struct cache *cache)
2738{
2739 unsigned i, r;
2740
2741 for (i = 0; i < from_cblock(cache->cache_size); i++) {
2742 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
2743 is_dirty(cache, to_cblock(i)));
2744 if (r)
2745 return r;
2746 }
2747
2748 return 0;
2749}
2750
2751static int write_discard_bitset(struct cache *cache)
2752{
2753 unsigned i, r;
2754
2755 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2756 cache->discard_nr_blocks);
2757 if (r) {
2758 DMERR("could not resize on-disk discard bitset");
2759 return r;
2760 }
2761
2762 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2763 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2764 is_discarded(cache, to_dblock(i)));
2765 if (r)
2766 return r;
2767 }
2768
2769 return 0;
2770}
2771
2772
2773
2774
2775static bool sync_metadata(struct cache *cache)
2776{
2777 int r1, r2, r3, r4;
2778
2779 r1 = write_dirty_bitset(cache);
2780 if (r1)
2781 DMERR("could not write dirty bitset");
2782
2783 r2 = write_discard_bitset(cache);
2784 if (r2)
2785 DMERR("could not write discard bitset");
2786
2787 save_stats(cache);
2788
2789 r3 = dm_cache_write_hints(cache->cmd, cache->policy);
2790 if (r3)
2791 DMERR("could not write hints");
2792
2793
2794
2795
2796
2797
2798 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
2799 if (r4)
2800 DMERR("could not write cache metadata. Data loss may occur.");
2801
2802 return !r1 && !r2 && !r3 && !r4;
2803}
2804
2805static void cache_postsuspend(struct dm_target *ti)
2806{
2807 struct cache *cache = ti->private;
2808
2809 start_quiescing(cache);
2810 wait_for_migrations(cache);
2811 stop_worker(cache);
2812 requeue_deferred_io(cache);
2813 stop_quiescing(cache);
2814
2815 (void) sync_metadata(cache);
2816}
2817
2818static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2819 bool dirty, uint32_t hint, bool hint_valid)
2820{
2821 int r;
2822 struct cache *cache = context;
2823
2824 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
2825 if (r)
2826 return r;
2827
2828 if (dirty)
2829 set_dirty(cache, oblock, cblock);
2830 else
2831 clear_dirty(cache, oblock, cblock);
2832
2833 return 0;
2834}
2835
2836
2837
2838
2839
2840
2841
2842struct discard_load_info {
2843 struct cache *cache;
2844
2845
2846
2847
2848
2849 dm_block_t block_size;
2850 dm_block_t discard_begin, discard_end;
2851};
2852
2853static void discard_load_info_init(struct cache *cache,
2854 struct discard_load_info *li)
2855{
2856 li->cache = cache;
2857 li->discard_begin = li->discard_end = 0;
2858}
2859
2860static void set_discard_range(struct discard_load_info *li)
2861{
2862 sector_t b, e;
2863
2864 if (li->discard_begin == li->discard_end)
2865 return;
2866
2867
2868
2869
2870 b = li->discard_begin * li->block_size;
2871 e = li->discard_end * li->block_size;
2872
2873
2874
2875
2876 b = dm_sector_div_up(b, li->cache->discard_block_size);
2877 sector_div(e, li->cache->discard_block_size);
2878
2879
2880
2881
2882
2883 if (e > from_dblock(li->cache->discard_nr_blocks))
2884 e = from_dblock(li->cache->discard_nr_blocks);
2885
2886 for (; b < e; b++)
2887 set_discard(li->cache, to_dblock(b));
2888}
2889
2890static int load_discard(void *context, sector_t discard_block_size,
2891 dm_dblock_t dblock, bool discard)
2892{
2893 struct discard_load_info *li = context;
2894
2895 li->block_size = discard_block_size;
2896
2897 if (discard) {
2898 if (from_dblock(dblock) == li->discard_end)
2899
2900
2901
2902 li->discard_end = li->discard_end + 1ULL;
2903
2904 else {
2905
2906
2907
2908 set_discard_range(li);
2909 li->discard_begin = from_dblock(dblock);
2910 li->discard_end = li->discard_begin + 1ULL;
2911 }
2912 } else {
2913 set_discard_range(li);
2914 li->discard_begin = li->discard_end = 0;
2915 }
2916
2917 return 0;
2918}
2919
2920static dm_cblock_t get_cache_dev_size(struct cache *cache)
2921{
2922 sector_t size = get_dev_size(cache->cache_dev);
2923 (void) sector_div(size, cache->sectors_per_block);
2924 return to_cblock(size);
2925}
2926
2927static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2928{
2929 if (from_cblock(new_size) > from_cblock(cache->cache_size))
2930 return true;
2931
2932
2933
2934
2935 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
2936 new_size = to_cblock(from_cblock(new_size) + 1);
2937 if (is_dirty(cache, new_size)) {
2938 DMERR("unable to shrink cache; cache block %llu is dirty",
2939 (unsigned long long) from_cblock(new_size));
2940 return false;
2941 }
2942 }
2943
2944 return true;
2945}
2946
2947static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2948{
2949 int r;
2950
2951 r = dm_cache_resize(cache->cmd, new_size);
2952 if (r) {
2953 DMERR("could not resize cache metadata");
2954 return r;
2955 }
2956
2957 set_cache_size(cache, new_size);
2958
2959 return 0;
2960}
2961
2962static int cache_preresume(struct dm_target *ti)
2963{
2964 int r = 0;
2965 struct cache *cache = ti->private;
2966 dm_cblock_t csize = get_cache_dev_size(cache);
2967
2968
2969
2970
2971 if (!cache->sized) {
2972 r = resize_cache_dev(cache, csize);
2973 if (r)
2974 return r;
2975
2976 cache->sized = true;
2977
2978 } else if (csize != cache->cache_size) {
2979 if (!can_resize(cache, csize))
2980 return -EINVAL;
2981
2982 r = resize_cache_dev(cache, csize);
2983 if (r)
2984 return r;
2985 }
2986
2987 if (!cache->loaded_mappings) {
2988 r = dm_cache_load_mappings(cache->cmd, cache->policy,
2989 load_mapping, cache);
2990 if (r) {
2991 DMERR("could not load cache mappings");
2992 return r;
2993 }
2994
2995 cache->loaded_mappings = true;
2996 }
2997
2998 if (!cache->loaded_discards) {
2999 struct discard_load_info li;
3000
3001
3002
3003
3004
3005
3006 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3007
3008 discard_load_info_init(cache, &li);
3009 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3010 if (r) {
3011 DMERR("could not load origin discards");
3012 return r;
3013 }
3014 set_discard_range(&li);
3015
3016 cache->loaded_discards = true;
3017 }
3018
3019 return r;
3020}
3021
3022static void cache_resume(struct dm_target *ti)
3023{
3024 struct cache *cache = ti->private;
3025
3026 cache->need_tick_bio = true;
3027 do_waker(&cache->waker.work);
3028}
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041static void cache_status(struct dm_target *ti, status_type_t type,
3042 unsigned status_flags, char *result, unsigned maxlen)
3043{
3044 int r = 0;
3045 unsigned i;
3046 ssize_t sz = 0;
3047 dm_block_t nr_free_blocks_metadata = 0;
3048 dm_block_t nr_blocks_metadata = 0;
3049 char buf[BDEVNAME_SIZE];
3050 struct cache *cache = ti->private;
3051 dm_cblock_t residency;
3052
3053 switch (type) {
3054 case STATUSTYPE_INFO:
3055
3056 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
3057 r = dm_cache_commit(cache->cmd, false);
3058 if (r)
3059 DMERR("could not commit metadata for accurate status");
3060 }
3061
3062 r = dm_cache_get_free_metadata_block_count(cache->cmd,
3063 &nr_free_blocks_metadata);
3064 if (r) {
3065 DMERR("could not get metadata free block count");
3066 goto err;
3067 }
3068
3069 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3070 if (r) {
3071 DMERR("could not get metadata device size");
3072 goto err;
3073 }
3074
3075 residency = policy_residency(cache->policy);
3076
3077 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
3078 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3079 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3080 (unsigned long long)nr_blocks_metadata,
3081 cache->sectors_per_block,
3082 (unsigned long long) from_cblock(residency),
3083 (unsigned long long) from_cblock(cache->cache_size),
3084 (unsigned) atomic_read(&cache->stats.read_hit),
3085 (unsigned) atomic_read(&cache->stats.read_miss),
3086 (unsigned) atomic_read(&cache->stats.write_hit),
3087 (unsigned) atomic_read(&cache->stats.write_miss),
3088 (unsigned) atomic_read(&cache->stats.demotion),
3089 (unsigned) atomic_read(&cache->stats.promotion),
3090 (unsigned long) atomic_read(&cache->nr_dirty));
3091
3092 if (writethrough_mode(&cache->features))
3093 DMEMIT("1 writethrough ");
3094
3095 else if (passthrough_mode(&cache->features))
3096 DMEMIT("1 passthrough ");
3097
3098 else if (writeback_mode(&cache->features))
3099 DMEMIT("1 writeback ");
3100
3101 else {
3102 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
3103 goto err;
3104 }
3105
3106 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3107
3108 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3109 if (sz < maxlen) {
3110 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
3111 if (r)
3112 DMERR("policy_emit_config_values returned %d", r);
3113 }
3114
3115 break;
3116
3117 case STATUSTYPE_TABLE:
3118 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3119 DMEMIT("%s ", buf);
3120 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3121 DMEMIT("%s ", buf);
3122 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3123 DMEMIT("%s", buf);
3124
3125 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3126 DMEMIT(" %s", cache->ctr_args[i]);
3127 if (cache->nr_ctr_args)
3128 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3129 }
3130
3131 return;
3132
3133err:
3134 DMEMIT("Error");
3135}
3136
3137
3138
3139
3140
3141
3142
3143static int parse_cblock_range(struct cache *cache, const char *str,
3144 struct cblock_range *result)
3145{
3146 char dummy;
3147 uint64_t b, e;
3148 int r;
3149
3150
3151
3152
3153 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3154 if (r < 0)
3155 return r;
3156
3157 if (r == 2) {
3158 result->begin = to_cblock(b);
3159 result->end = to_cblock(e);
3160 return 0;
3161 }
3162
3163
3164
3165
3166 r = sscanf(str, "%llu%c", &b, &dummy);
3167 if (r < 0)
3168 return r;
3169
3170 if (r == 1) {
3171 result->begin = to_cblock(b);
3172 result->end = to_cblock(from_cblock(result->begin) + 1u);
3173 return 0;
3174 }
3175
3176 DMERR("invalid cblock range '%s'", str);
3177 return -EINVAL;
3178}
3179
3180static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3181{
3182 uint64_t b = from_cblock(range->begin);
3183 uint64_t e = from_cblock(range->end);
3184 uint64_t n = from_cblock(cache->cache_size);
3185
3186 if (b >= n) {
3187 DMERR("begin cblock out of range: %llu >= %llu", b, n);
3188 return -EINVAL;
3189 }
3190
3191 if (e > n) {
3192 DMERR("end cblock out of range: %llu > %llu", e, n);
3193 return -EINVAL;
3194 }
3195
3196 if (b >= e) {
3197 DMERR("invalid cblock range: %llu >= %llu", b, e);
3198 return -EINVAL;
3199 }
3200
3201 return 0;
3202}
3203
3204static int request_invalidation(struct cache *cache, struct cblock_range *range)
3205{
3206 struct invalidation_request req;
3207
3208 INIT_LIST_HEAD(&req.list);
3209 req.cblocks = range;
3210 atomic_set(&req.complete, 0);
3211 req.err = 0;
3212 init_waitqueue_head(&req.result_wait);
3213
3214 spin_lock(&cache->invalidation_lock);
3215 list_add(&req.list, &cache->invalidation_requests);
3216 spin_unlock(&cache->invalidation_lock);
3217 wake_worker(cache);
3218
3219 wait_event(req.result_wait, atomic_read(&req.complete));
3220 return req.err;
3221}
3222
3223static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3224 const char **cblock_ranges)
3225{
3226 int r = 0;
3227 unsigned i;
3228 struct cblock_range range;
3229
3230 if (!passthrough_mode(&cache->features)) {
3231 DMERR("cache has to be in passthrough mode for invalidation");
3232 return -EPERM;
3233 }
3234
3235 for (i = 0; i < count; i++) {
3236 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3237 if (r)
3238 break;
3239
3240 r = validate_cblock_range(cache, &range);
3241 if (r)
3242 break;
3243
3244
3245
3246
3247 r = request_invalidation(cache, &range);
3248 if (r)
3249 break;
3250 }
3251
3252 return r;
3253}
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3264{
3265 struct cache *cache = ti->private;
3266
3267 if (!argc)
3268 return -EINVAL;
3269
3270 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3271 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3272
3273 if (argc != 2)
3274 return -EINVAL;
3275
3276 return set_config_value(cache, argv[0], argv[1]);
3277}
3278
3279static int cache_iterate_devices(struct dm_target *ti,
3280 iterate_devices_callout_fn fn, void *data)
3281{
3282 int r = 0;
3283 struct cache *cache = ti->private;
3284
3285 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3286 if (!r)
3287 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3288
3289 return r;
3290}
3291
3292
3293
3294
3295
3296
3297
3298static int cache_bvec_merge(struct dm_target *ti,
3299 struct bvec_merge_data *bvm,
3300 struct bio_vec *biovec, int max_size)
3301{
3302 struct cache *cache = ti->private;
3303 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
3304
3305 if (!q->merge_bvec_fn)
3306 return max_size;
3307
3308 bvm->bi_bdev = cache->origin_dev->bdev;
3309 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
3310}
3311
3312static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3313{
3314
3315
3316
3317 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3318 cache->origin_sectors);
3319 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3320}
3321
3322static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3323{
3324 struct cache *cache = ti->private;
3325 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3326
3327
3328
3329
3330
3331 if (io_opt_sectors < cache->sectors_per_block ||
3332 do_div(io_opt_sectors, cache->sectors_per_block)) {
3333 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3334 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3335 }
3336 set_discard_limits(cache, limits);
3337}
3338
3339
3340
3341static struct target_type cache_target = {
3342 .name = "cache",
3343 .version = {1, 6, 0},
3344 .module = THIS_MODULE,
3345 .ctr = cache_ctr,
3346 .dtr = cache_dtr,
3347 .map = cache_map,
3348 .end_io = cache_end_io,
3349 .postsuspend = cache_postsuspend,
3350 .preresume = cache_preresume,
3351 .resume = cache_resume,
3352 .status = cache_status,
3353 .message = cache_message,
3354 .iterate_devices = cache_iterate_devices,
3355 .merge = cache_bvec_merge,
3356 .io_hints = cache_io_hints,
3357};
3358
3359static int __init dm_cache_init(void)
3360{
3361 int r;
3362
3363 r = dm_register_target(&cache_target);
3364 if (r) {
3365 DMERR("cache target registration failed: %d", r);
3366 return r;
3367 }
3368
3369 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3370 if (!migration_cache) {
3371 dm_unregister_target(&cache_target);
3372 return -ENOMEM;
3373 }
3374
3375 return 0;
3376}
3377
3378static void __exit dm_cache_exit(void)
3379{
3380 dm_unregister_target(&cache_target);
3381 kmem_cache_destroy(migration_cache);
3382}
3383
3384module_init(dm_cache_init);
3385module_exit(dm_cache_exit);
3386
3387MODULE_DESCRIPTION(DM_NAME " cache target");
3388MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3389MODULE_LICENSE("GPL");
3390