1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison-v2.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/rwsem.h>
19#include <linux/slab.h>
20#include <linux/vmalloc.h>
21
22#define DM_MSG_PREFIX "cache"
23
24DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
25 "A percentage of time allocated for copying to and/or from cache");
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42struct io_tracker {
43 spinlock_t lock;
44
45
46
47
48 sector_t in_flight;
49
50
51
52
53
54 unsigned long idle_time;
55 unsigned long last_update_time;
56};
57
58static void iot_init(struct io_tracker *iot)
59{
60 spin_lock_init(&iot->lock);
61 iot->in_flight = 0ul;
62 iot->idle_time = 0ul;
63 iot->last_update_time = jiffies;
64}
65
66static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
67{
68 if (iot->in_flight)
69 return false;
70
71 return time_after(jiffies, iot->idle_time + jifs);
72}
73
74static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
75{
76 bool r;
77 unsigned long flags;
78
79 spin_lock_irqsave(&iot->lock, flags);
80 r = __iot_idle_for(iot, jifs);
81 spin_unlock_irqrestore(&iot->lock, flags);
82
83 return r;
84}
85
86static void iot_io_begin(struct io_tracker *iot, sector_t len)
87{
88 unsigned long flags;
89
90 spin_lock_irqsave(&iot->lock, flags);
91 iot->in_flight += len;
92 spin_unlock_irqrestore(&iot->lock, flags);
93}
94
95static void __iot_io_end(struct io_tracker *iot, sector_t len)
96{
97 if (!len)
98 return;
99
100 iot->in_flight -= len;
101 if (!iot->in_flight)
102 iot->idle_time = jiffies;
103}
104
105static void iot_io_end(struct io_tracker *iot, sector_t len)
106{
107 unsigned long flags;
108
109 spin_lock_irqsave(&iot->lock, flags);
110 __iot_io_end(iot, len);
111 spin_unlock_irqrestore(&iot->lock, flags);
112}
113
114
115
116
117
118
119
120struct continuation {
121 struct work_struct ws;
122 blk_status_t input;
123};
124
125static inline void init_continuation(struct continuation *k,
126 void (*fn)(struct work_struct *))
127{
128 INIT_WORK(&k->ws, fn);
129 k->input = 0;
130}
131
132static inline void queue_continuation(struct workqueue_struct *wq,
133 struct continuation *k)
134{
135 queue_work(wq, &k->ws);
136}
137
138
139
140
141
142
143
144struct batcher {
145
146
147
148 blk_status_t (*commit_op)(void *context);
149 void *commit_context;
150
151
152
153
154
155 void (*issue_op)(struct bio *bio, void *context);
156 void *issue_context;
157
158
159
160
161 struct workqueue_struct *wq;
162
163 spinlock_t lock;
164 struct list_head work_items;
165 struct bio_list bios;
166 struct work_struct commit_work;
167
168 bool commit_scheduled;
169};
170
171static void __commit(struct work_struct *_ws)
172{
173 struct batcher *b = container_of(_ws, struct batcher, commit_work);
174 blk_status_t r;
175 unsigned long flags;
176 struct list_head work_items;
177 struct work_struct *ws, *tmp;
178 struct continuation *k;
179 struct bio *bio;
180 struct bio_list bios;
181
182 INIT_LIST_HEAD(&work_items);
183 bio_list_init(&bios);
184
185
186
187
188
189 spin_lock_irqsave(&b->lock, flags);
190 list_splice_init(&b->work_items, &work_items);
191 bio_list_merge(&bios, &b->bios);
192 bio_list_init(&b->bios);
193 b->commit_scheduled = false;
194 spin_unlock_irqrestore(&b->lock, flags);
195
196 r = b->commit_op(b->commit_context);
197
198 list_for_each_entry_safe(ws, tmp, &work_items, entry) {
199 k = container_of(ws, struct continuation, ws);
200 k->input = r;
201 INIT_LIST_HEAD(&ws->entry);
202 queue_work(b->wq, ws);
203 }
204
205 while ((bio = bio_list_pop(&bios))) {
206 if (r) {
207 bio->bi_status = r;
208 bio_endio(bio);
209 } else
210 b->issue_op(bio, b->issue_context);
211 }
212}
213
214static void batcher_init(struct batcher *b,
215 blk_status_t (*commit_op)(void *),
216 void *commit_context,
217 void (*issue_op)(struct bio *bio, void *),
218 void *issue_context,
219 struct workqueue_struct *wq)
220{
221 b->commit_op = commit_op;
222 b->commit_context = commit_context;
223 b->issue_op = issue_op;
224 b->issue_context = issue_context;
225 b->wq = wq;
226
227 spin_lock_init(&b->lock);
228 INIT_LIST_HEAD(&b->work_items);
229 bio_list_init(&b->bios);
230 INIT_WORK(&b->commit_work, __commit);
231 b->commit_scheduled = false;
232}
233
234static void async_commit(struct batcher *b)
235{
236 queue_work(b->wq, &b->commit_work);
237}
238
239static void continue_after_commit(struct batcher *b, struct continuation *k)
240{
241 unsigned long flags;
242 bool commit_scheduled;
243
244 spin_lock_irqsave(&b->lock, flags);
245 commit_scheduled = b->commit_scheduled;
246 list_add_tail(&k->ws.entry, &b->work_items);
247 spin_unlock_irqrestore(&b->lock, flags);
248
249 if (commit_scheduled)
250 async_commit(b);
251}
252
253
254
255
256static void issue_after_commit(struct batcher *b, struct bio *bio)
257{
258 unsigned long flags;
259 bool commit_scheduled;
260
261 spin_lock_irqsave(&b->lock, flags);
262 commit_scheduled = b->commit_scheduled;
263 bio_list_add(&b->bios, bio);
264 spin_unlock_irqrestore(&b->lock, flags);
265
266 if (commit_scheduled)
267 async_commit(b);
268}
269
270
271
272
273static void schedule_commit(struct batcher *b)
274{
275 bool immediate;
276 unsigned long flags;
277
278 spin_lock_irqsave(&b->lock, flags);
279 immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
280 b->commit_scheduled = true;
281 spin_unlock_irqrestore(&b->lock, flags);
282
283 if (immediate)
284 async_commit(b);
285}
286
287
288
289
290
291
292struct dm_hook_info {
293 bio_end_io_t *bi_end_io;
294};
295
296static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
297 bio_end_io_t *bi_end_io, void *bi_private)
298{
299 h->bi_end_io = bio->bi_end_io;
300
301 bio->bi_end_io = bi_end_io;
302 bio->bi_private = bi_private;
303}
304
305static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
306{
307 bio->bi_end_io = h->bi_end_io;
308}
309
310
311
312#define MIGRATION_POOL_SIZE 128
313#define COMMIT_PERIOD HZ
314#define MIGRATION_COUNT_WINDOW 10
315
316
317
318
319
320#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
321#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
322
323enum cache_metadata_mode {
324 CM_WRITE,
325 CM_READ_ONLY,
326 CM_FAIL
327};
328
329enum cache_io_mode {
330
331
332
333
334
335 CM_IO_WRITEBACK,
336
337
338
339
340
341 CM_IO_WRITETHROUGH,
342
343
344
345
346
347
348
349 CM_IO_PASSTHROUGH
350};
351
352struct cache_features {
353 enum cache_metadata_mode mode;
354 enum cache_io_mode io_mode;
355 unsigned metadata_version;
356 bool discard_passdown:1;
357};
358
359struct cache_stats {
360 atomic_t read_hit;
361 atomic_t read_miss;
362 atomic_t write_hit;
363 atomic_t write_miss;
364 atomic_t demotion;
365 atomic_t promotion;
366 atomic_t writeback;
367 atomic_t copies_avoided;
368 atomic_t cache_cell_clash;
369 atomic_t commit_count;
370 atomic_t discard_count;
371};
372
373struct cache {
374 struct dm_target *ti;
375 spinlock_t lock;
376
377
378
379
380 int sectors_per_block_shift;
381 sector_t sectors_per_block;
382
383 struct dm_cache_metadata *cmd;
384
385
386
387
388 struct dm_dev *metadata_dev;
389
390
391
392
393 struct dm_dev *origin_dev;
394
395
396
397
398 struct dm_dev *cache_dev;
399
400
401
402
403 dm_oblock_t origin_blocks;
404 sector_t origin_sectors;
405
406
407
408
409 dm_cblock_t cache_size;
410
411
412
413
414 spinlock_t invalidation_lock;
415 struct list_head invalidation_requests;
416
417 sector_t migration_threshold;
418 wait_queue_head_t migration_wait;
419 atomic_t nr_allocated_migrations;
420
421
422
423
424
425 atomic_t nr_io_migrations;
426
427 struct bio_list deferred_bios;
428
429 struct rw_semaphore quiesce_lock;
430
431 struct dm_target_callbacks callbacks;
432
433
434
435
436 dm_dblock_t discard_nr_blocks;
437 unsigned long *discard_bitset;
438 uint32_t discard_block_size;
439
440
441
442
443
444 unsigned nr_ctr_args;
445 const char **ctr_args;
446
447 struct dm_kcopyd_client *copier;
448 struct work_struct deferred_bio_worker;
449 struct work_struct migration_worker;
450 struct workqueue_struct *wq;
451 struct delayed_work waker;
452 struct dm_bio_prison_v2 *prison;
453
454
455
456
457 unsigned long *dirty_bitset;
458 atomic_t nr_dirty;
459
460 unsigned policy_nr_args;
461 struct dm_cache_policy *policy;
462
463
464
465
466 struct cache_features features;
467
468 struct cache_stats stats;
469
470 bool need_tick_bio:1;
471 bool sized:1;
472 bool invalidate:1;
473 bool commit_requested:1;
474 bool loaded_mappings:1;
475 bool loaded_discards:1;
476
477 struct rw_semaphore background_work_lock;
478
479 struct batcher committer;
480 struct work_struct commit_ws;
481
482 struct io_tracker tracker;
483
484 mempool_t migration_pool;
485
486 struct bio_set bs;
487};
488
489struct per_bio_data {
490 bool tick:1;
491 unsigned req_nr:2;
492 struct dm_bio_prison_cell_v2 *cell;
493 struct dm_hook_info hook_info;
494 sector_t len;
495};
496
497struct dm_cache_migration {
498 struct continuation k;
499 struct cache *cache;
500
501 struct policy_work *op;
502 struct bio *overwrite_bio;
503 struct dm_bio_prison_cell_v2 *cell;
504
505 dm_cblock_t invalidate_cblock;
506 dm_oblock_t invalidate_oblock;
507};
508
509
510
511static bool writethrough_mode(struct cache *cache)
512{
513 return cache->features.io_mode == CM_IO_WRITETHROUGH;
514}
515
516static bool writeback_mode(struct cache *cache)
517{
518 return cache->features.io_mode == CM_IO_WRITEBACK;
519}
520
521static inline bool passthrough_mode(struct cache *cache)
522{
523 return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
524}
525
526
527
528static void wake_deferred_bio_worker(struct cache *cache)
529{
530 queue_work(cache->wq, &cache->deferred_bio_worker);
531}
532
533static void wake_migration_worker(struct cache *cache)
534{
535 if (passthrough_mode(cache))
536 return;
537
538 queue_work(cache->wq, &cache->migration_worker);
539}
540
541
542
543static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
544{
545 return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
546}
547
548static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
549{
550 dm_bio_prison_free_cell_v2(cache->prison, cell);
551}
552
553static struct dm_cache_migration *alloc_migration(struct cache *cache)
554{
555 struct dm_cache_migration *mg;
556
557 mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
558 if (!mg)
559 return NULL;
560
561 memset(mg, 0, sizeof(*mg));
562
563 mg->cache = cache;
564 atomic_inc(&cache->nr_allocated_migrations);
565
566 return mg;
567}
568
569static void free_migration(struct dm_cache_migration *mg)
570{
571 struct cache *cache = mg->cache;
572
573 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
574 wake_up(&cache->migration_wait);
575
576 mempool_free(mg, &cache->migration_pool);
577}
578
579
580
581static inline dm_oblock_t oblock_succ(dm_oblock_t b)
582{
583 return to_oblock(from_oblock(b) + 1ull);
584}
585
586static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
587{
588 key->virtual = 0;
589 key->dev = 0;
590 key->block_begin = from_oblock(begin);
591 key->block_end = from_oblock(end);
592}
593
594
595
596
597
598#define WRITE_LOCK_LEVEL 0
599#define READ_WRITE_LOCK_LEVEL 1
600
601static unsigned lock_level(struct bio *bio)
602{
603 return bio_data_dir(bio) == WRITE ?
604 WRITE_LOCK_LEVEL :
605 READ_WRITE_LOCK_LEVEL;
606}
607
608
609
610
611
612static struct per_bio_data *get_per_bio_data(struct bio *bio)
613{
614 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
615 BUG_ON(!pb);
616 return pb;
617}
618
619static struct per_bio_data *init_per_bio_data(struct bio *bio)
620{
621 struct per_bio_data *pb = get_per_bio_data(bio);
622
623 pb->tick = false;
624 pb->req_nr = dm_bio_get_target_bio_nr(bio);
625 pb->cell = NULL;
626 pb->len = 0;
627
628 return pb;
629}
630
631
632
633static void defer_bio(struct cache *cache, struct bio *bio)
634{
635 unsigned long flags;
636
637 spin_lock_irqsave(&cache->lock, flags);
638 bio_list_add(&cache->deferred_bios, bio);
639 spin_unlock_irqrestore(&cache->lock, flags);
640
641 wake_deferred_bio_worker(cache);
642}
643
644static void defer_bios(struct cache *cache, struct bio_list *bios)
645{
646 unsigned long flags;
647
648 spin_lock_irqsave(&cache->lock, flags);
649 bio_list_merge(&cache->deferred_bios, bios);
650 bio_list_init(bios);
651 spin_unlock_irqrestore(&cache->lock, flags);
652
653 wake_deferred_bio_worker(cache);
654}
655
656
657
658static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
659{
660 bool r;
661 struct per_bio_data *pb;
662 struct dm_cell_key_v2 key;
663 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
664 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
665
666 cell_prealloc = alloc_prison_cell(cache);
667 if (!cell_prealloc) {
668 defer_bio(cache, bio);
669 return false;
670 }
671
672 build_key(oblock, end, &key);
673 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
674 if (!r) {
675
676
677
678 free_prison_cell(cache, cell_prealloc);
679 return r;
680 }
681
682 if (cell != cell_prealloc)
683 free_prison_cell(cache, cell_prealloc);
684
685 pb = get_per_bio_data(bio);
686 pb->cell = cell;
687
688 return r;
689}
690
691
692
693static bool is_dirty(struct cache *cache, dm_cblock_t b)
694{
695 return test_bit(from_cblock(b), cache->dirty_bitset);
696}
697
698static void set_dirty(struct cache *cache, dm_cblock_t cblock)
699{
700 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
701 atomic_inc(&cache->nr_dirty);
702 policy_set_dirty(cache->policy, cblock);
703 }
704}
705
706
707
708
709
710static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
711{
712 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
713 atomic_inc(&cache->nr_dirty);
714 policy_set_dirty(cache->policy, cblock);
715}
716
717static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
718{
719 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
720 if (atomic_dec_return(&cache->nr_dirty) == 0)
721 dm_table_event(cache->ti->table);
722 }
723
724 policy_clear_dirty(cache->policy, cblock);
725}
726
727
728
729static bool block_size_is_power_of_two(struct cache *cache)
730{
731 return cache->sectors_per_block_shift >= 0;
732}
733
734
735#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
736__always_inline
737#endif
738static dm_block_t block_div(dm_block_t b, uint32_t n)
739{
740 do_div(b, n);
741
742 return b;
743}
744
745static dm_block_t oblocks_per_dblock(struct cache *cache)
746{
747 dm_block_t oblocks = cache->discard_block_size;
748
749 if (block_size_is_power_of_two(cache))
750 oblocks >>= cache->sectors_per_block_shift;
751 else
752 oblocks = block_div(oblocks, cache->sectors_per_block);
753
754 return oblocks;
755}
756
757static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
758{
759 return to_dblock(block_div(from_oblock(oblock),
760 oblocks_per_dblock(cache)));
761}
762
763static void set_discard(struct cache *cache, dm_dblock_t b)
764{
765 unsigned long flags;
766
767 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
768 atomic_inc(&cache->stats.discard_count);
769
770 spin_lock_irqsave(&cache->lock, flags);
771 set_bit(from_dblock(b), cache->discard_bitset);
772 spin_unlock_irqrestore(&cache->lock, flags);
773}
774
775static void clear_discard(struct cache *cache, dm_dblock_t b)
776{
777 unsigned long flags;
778
779 spin_lock_irqsave(&cache->lock, flags);
780 clear_bit(from_dblock(b), cache->discard_bitset);
781 spin_unlock_irqrestore(&cache->lock, flags);
782}
783
784static bool is_discarded(struct cache *cache, dm_dblock_t b)
785{
786 int r;
787 unsigned long flags;
788
789 spin_lock_irqsave(&cache->lock, flags);
790 r = test_bit(from_dblock(b), cache->discard_bitset);
791 spin_unlock_irqrestore(&cache->lock, flags);
792
793 return r;
794}
795
796static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
797{
798 int r;
799 unsigned long flags;
800
801 spin_lock_irqsave(&cache->lock, flags);
802 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
803 cache->discard_bitset);
804 spin_unlock_irqrestore(&cache->lock, flags);
805
806 return r;
807}
808
809
810
811
812static void remap_to_origin(struct cache *cache, struct bio *bio)
813{
814 bio_set_dev(bio, cache->origin_dev->bdev);
815}
816
817static void remap_to_cache(struct cache *cache, struct bio *bio,
818 dm_cblock_t cblock)
819{
820 sector_t bi_sector = bio->bi_iter.bi_sector;
821 sector_t block = from_cblock(cblock);
822
823 bio_set_dev(bio, cache->cache_dev->bdev);
824 if (!block_size_is_power_of_two(cache))
825 bio->bi_iter.bi_sector =
826 (block * cache->sectors_per_block) +
827 sector_div(bi_sector, cache->sectors_per_block);
828 else
829 bio->bi_iter.bi_sector =
830 (block << cache->sectors_per_block_shift) |
831 (bi_sector & (cache->sectors_per_block - 1));
832}
833
834static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
835{
836 unsigned long flags;
837 struct per_bio_data *pb;
838
839 spin_lock_irqsave(&cache->lock, flags);
840 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
841 bio_op(bio) != REQ_OP_DISCARD) {
842 pb = get_per_bio_data(bio);
843 pb->tick = true;
844 cache->need_tick_bio = false;
845 }
846 spin_unlock_irqrestore(&cache->lock, flags);
847}
848
849static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
850 dm_oblock_t oblock, bool bio_has_pbd)
851{
852 if (bio_has_pbd)
853 check_if_tick_bio_needed(cache, bio);
854 remap_to_origin(cache, bio);
855 if (bio_data_dir(bio) == WRITE)
856 clear_discard(cache, oblock_to_dblock(cache, oblock));
857}
858
859static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
860 dm_oblock_t oblock)
861{
862
863 __remap_to_origin_clear_discard(cache, bio, oblock, true);
864}
865
866static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
867 dm_oblock_t oblock, dm_cblock_t cblock)
868{
869 check_if_tick_bio_needed(cache, bio);
870 remap_to_cache(cache, bio, cblock);
871 if (bio_data_dir(bio) == WRITE) {
872 set_dirty(cache, cblock);
873 clear_discard(cache, oblock_to_dblock(cache, oblock));
874 }
875}
876
877static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
878{
879 sector_t block_nr = bio->bi_iter.bi_sector;
880
881 if (!block_size_is_power_of_two(cache))
882 (void) sector_div(block_nr, cache->sectors_per_block);
883 else
884 block_nr >>= cache->sectors_per_block_shift;
885
886 return to_oblock(block_nr);
887}
888
889static bool accountable_bio(struct cache *cache, struct bio *bio)
890{
891 return bio_op(bio) != REQ_OP_DISCARD;
892}
893
894static void accounted_begin(struct cache *cache, struct bio *bio)
895{
896 struct per_bio_data *pb;
897
898 if (accountable_bio(cache, bio)) {
899 pb = get_per_bio_data(bio);
900 pb->len = bio_sectors(bio);
901 iot_io_begin(&cache->tracker, pb->len);
902 }
903}
904
905static void accounted_complete(struct cache *cache, struct bio *bio)
906{
907 struct per_bio_data *pb = get_per_bio_data(bio);
908
909 iot_io_end(&cache->tracker, pb->len);
910}
911
912static void accounted_request(struct cache *cache, struct bio *bio)
913{
914 accounted_begin(cache, bio);
915 generic_make_request(bio);
916}
917
918static void issue_op(struct bio *bio, void *context)
919{
920 struct cache *cache = context;
921 accounted_request(cache, bio);
922}
923
924
925
926
927
928static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
929 dm_oblock_t oblock, dm_cblock_t cblock)
930{
931 struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
932
933 BUG_ON(!origin_bio);
934
935 bio_chain(origin_bio, bio);
936
937
938
939
940 __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
941 submit_bio(origin_bio);
942
943 remap_to_cache(cache, bio, cblock);
944}
945
946
947
948
949static enum cache_metadata_mode get_cache_mode(struct cache *cache)
950{
951 return cache->features.mode;
952}
953
954static const char *cache_device_name(struct cache *cache)
955{
956 return dm_device_name(dm_table_get_md(cache->ti->table));
957}
958
959static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
960{
961 const char *descs[] = {
962 "write",
963 "read-only",
964 "fail"
965 };
966
967 dm_table_event(cache->ti->table);
968 DMINFO("%s: switching cache to %s mode",
969 cache_device_name(cache), descs[(int)mode]);
970}
971
972static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
973{
974 bool needs_check;
975 enum cache_metadata_mode old_mode = get_cache_mode(cache);
976
977 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
978 DMERR("%s: unable to read needs_check flag, setting failure mode.",
979 cache_device_name(cache));
980 new_mode = CM_FAIL;
981 }
982
983 if (new_mode == CM_WRITE && needs_check) {
984 DMERR("%s: unable to switch cache to write mode until repaired.",
985 cache_device_name(cache));
986 if (old_mode != new_mode)
987 new_mode = old_mode;
988 else
989 new_mode = CM_READ_ONLY;
990 }
991
992
993 if (old_mode == CM_FAIL)
994 new_mode = CM_FAIL;
995
996 switch (new_mode) {
997 case CM_FAIL:
998 case CM_READ_ONLY:
999 dm_cache_metadata_set_read_only(cache->cmd);
1000 break;
1001
1002 case CM_WRITE:
1003 dm_cache_metadata_set_read_write(cache->cmd);
1004 break;
1005 }
1006
1007 cache->features.mode = new_mode;
1008
1009 if (new_mode != old_mode)
1010 notify_mode_switch(cache, new_mode);
1011}
1012
1013static void abort_transaction(struct cache *cache)
1014{
1015 const char *dev_name = cache_device_name(cache);
1016
1017 if (get_cache_mode(cache) >= CM_READ_ONLY)
1018 return;
1019
1020 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
1021 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
1022 set_cache_mode(cache, CM_FAIL);
1023 }
1024
1025 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1026 if (dm_cache_metadata_abort(cache->cmd)) {
1027 DMERR("%s: failed to abort metadata transaction", dev_name);
1028 set_cache_mode(cache, CM_FAIL);
1029 }
1030}
1031
1032static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1033{
1034 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1035 cache_device_name(cache), op, r);
1036 abort_transaction(cache);
1037 set_cache_mode(cache, CM_READ_ONLY);
1038}
1039
1040
1041
1042static void load_stats(struct cache *cache)
1043{
1044 struct dm_cache_statistics stats;
1045
1046 dm_cache_metadata_get_stats(cache->cmd, &stats);
1047 atomic_set(&cache->stats.read_hit, stats.read_hits);
1048 atomic_set(&cache->stats.read_miss, stats.read_misses);
1049 atomic_set(&cache->stats.write_hit, stats.write_hits);
1050 atomic_set(&cache->stats.write_miss, stats.write_misses);
1051}
1052
1053static void save_stats(struct cache *cache)
1054{
1055 struct dm_cache_statistics stats;
1056
1057 if (get_cache_mode(cache) >= CM_READ_ONLY)
1058 return;
1059
1060 stats.read_hits = atomic_read(&cache->stats.read_hit);
1061 stats.read_misses = atomic_read(&cache->stats.read_miss);
1062 stats.write_hits = atomic_read(&cache->stats.write_hit);
1063 stats.write_misses = atomic_read(&cache->stats.write_miss);
1064
1065 dm_cache_metadata_set_stats(cache->cmd, &stats);
1066}
1067
1068static void update_stats(struct cache_stats *stats, enum policy_operation op)
1069{
1070 switch (op) {
1071 case POLICY_PROMOTE:
1072 atomic_inc(&stats->promotion);
1073 break;
1074
1075 case POLICY_DEMOTE:
1076 atomic_inc(&stats->demotion);
1077 break;
1078
1079 case POLICY_WRITEBACK:
1080 atomic_inc(&stats->writeback);
1081 break;
1082 }
1083}
1084
1085
1086
1087
1088
1089
1090
1091
1092static void inc_io_migrations(struct cache *cache)
1093{
1094 atomic_inc(&cache->nr_io_migrations);
1095}
1096
1097static void dec_io_migrations(struct cache *cache)
1098{
1099 atomic_dec(&cache->nr_io_migrations);
1100}
1101
1102static bool discard_or_flush(struct bio *bio)
1103{
1104 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1105}
1106
1107static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1108 dm_dblock_t *b, dm_dblock_t *e)
1109{
1110 sector_t sb = bio->bi_iter.bi_sector;
1111 sector_t se = bio_end_sector(bio);
1112
1113 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1114
1115 if (se - sb < cache->discard_block_size)
1116 *e = *b;
1117 else
1118 *e = to_dblock(block_div(se, cache->discard_block_size));
1119}
1120
1121
1122
1123static void prevent_background_work(struct cache *cache)
1124{
1125 lockdep_off();
1126 down_write(&cache->background_work_lock);
1127 lockdep_on();
1128}
1129
1130static void allow_background_work(struct cache *cache)
1131{
1132 lockdep_off();
1133 up_write(&cache->background_work_lock);
1134 lockdep_on();
1135}
1136
1137static bool background_work_begin(struct cache *cache)
1138{
1139 bool r;
1140
1141 lockdep_off();
1142 r = down_read_trylock(&cache->background_work_lock);
1143 lockdep_on();
1144
1145 return r;
1146}
1147
1148static void background_work_end(struct cache *cache)
1149{
1150 lockdep_off();
1151 up_read(&cache->background_work_lock);
1152 lockdep_on();
1153}
1154
1155
1156
1157static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1158{
1159 return (bio_data_dir(bio) == WRITE) &&
1160 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1161}
1162
1163static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1164{
1165 return writeback_mode(cache) &&
1166 (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1167}
1168
1169static void quiesce(struct dm_cache_migration *mg,
1170 void (*continuation)(struct work_struct *))
1171{
1172 init_continuation(&mg->k, continuation);
1173 dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1174}
1175
1176static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1177{
1178 struct continuation *k = container_of(ws, struct continuation, ws);
1179 return container_of(k, struct dm_cache_migration, k);
1180}
1181
1182static void copy_complete(int read_err, unsigned long write_err, void *context)
1183{
1184 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1185
1186 if (read_err || write_err)
1187 mg->k.input = BLK_STS_IOERR;
1188
1189 queue_continuation(mg->cache->wq, &mg->k);
1190}
1191
1192static void copy(struct dm_cache_migration *mg, bool promote)
1193{
1194 struct dm_io_region o_region, c_region;
1195 struct cache *cache = mg->cache;
1196
1197 o_region.bdev = cache->origin_dev->bdev;
1198 o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1199 o_region.count = cache->sectors_per_block;
1200
1201 c_region.bdev = cache->cache_dev->bdev;
1202 c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1203 c_region.count = cache->sectors_per_block;
1204
1205 if (promote)
1206 dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1207 else
1208 dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1209}
1210
1211static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1212{
1213 struct per_bio_data *pb = get_per_bio_data(bio);
1214
1215 if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1216 free_prison_cell(cache, pb->cell);
1217 pb->cell = NULL;
1218}
1219
1220static void overwrite_endio(struct bio *bio)
1221{
1222 struct dm_cache_migration *mg = bio->bi_private;
1223 struct cache *cache = mg->cache;
1224 struct per_bio_data *pb = get_per_bio_data(bio);
1225
1226 dm_unhook_bio(&pb->hook_info, bio);
1227
1228 if (bio->bi_status)
1229 mg->k.input = bio->bi_status;
1230
1231 queue_continuation(cache->wq, &mg->k);
1232}
1233
1234static void overwrite(struct dm_cache_migration *mg,
1235 void (*continuation)(struct work_struct *))
1236{
1237 struct bio *bio = mg->overwrite_bio;
1238 struct per_bio_data *pb = get_per_bio_data(bio);
1239
1240 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1241
1242
1243
1244
1245
1246 if (mg->op->op == POLICY_PROMOTE)
1247 remap_to_cache(mg->cache, bio, mg->op->cblock);
1248 else
1249 remap_to_origin(mg->cache, bio);
1250
1251 init_continuation(&mg->k, continuation);
1252 accounted_request(mg->cache, bio);
1253}
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266static void mg_complete(struct dm_cache_migration *mg, bool success)
1267{
1268 struct bio_list bios;
1269 struct cache *cache = mg->cache;
1270 struct policy_work *op = mg->op;
1271 dm_cblock_t cblock = op->cblock;
1272
1273 if (success)
1274 update_stats(&cache->stats, op->op);
1275
1276 switch (op->op) {
1277 case POLICY_PROMOTE:
1278 clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1279 policy_complete_background_work(cache->policy, op, success);
1280
1281 if (mg->overwrite_bio) {
1282 if (success)
1283 force_set_dirty(cache, cblock);
1284 else if (mg->k.input)
1285 mg->overwrite_bio->bi_status = mg->k.input;
1286 else
1287 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1288 bio_endio(mg->overwrite_bio);
1289 } else {
1290 if (success)
1291 force_clear_dirty(cache, cblock);
1292 dec_io_migrations(cache);
1293 }
1294 break;
1295
1296 case POLICY_DEMOTE:
1297
1298
1299
1300 if (success)
1301 force_clear_dirty(cache, cblock);
1302 policy_complete_background_work(cache->policy, op, success);
1303 dec_io_migrations(cache);
1304 break;
1305
1306 case POLICY_WRITEBACK:
1307 if (success)
1308 force_clear_dirty(cache, cblock);
1309 policy_complete_background_work(cache->policy, op, success);
1310 dec_io_migrations(cache);
1311 break;
1312 }
1313
1314 bio_list_init(&bios);
1315 if (mg->cell) {
1316 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1317 free_prison_cell(cache, mg->cell);
1318 }
1319
1320 free_migration(mg);
1321 defer_bios(cache, &bios);
1322 wake_migration_worker(cache);
1323
1324 background_work_end(cache);
1325}
1326
1327static void mg_success(struct work_struct *ws)
1328{
1329 struct dm_cache_migration *mg = ws_to_mg(ws);
1330 mg_complete(mg, mg->k.input == 0);
1331}
1332
1333static void mg_update_metadata(struct work_struct *ws)
1334{
1335 int r;
1336 struct dm_cache_migration *mg = ws_to_mg(ws);
1337 struct cache *cache = mg->cache;
1338 struct policy_work *op = mg->op;
1339
1340 switch (op->op) {
1341 case POLICY_PROMOTE:
1342 r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1343 if (r) {
1344 DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1345 cache_device_name(cache));
1346 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1347
1348 mg_complete(mg, false);
1349 return;
1350 }
1351 mg_complete(mg, true);
1352 break;
1353
1354 case POLICY_DEMOTE:
1355 r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1356 if (r) {
1357 DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1358 cache_device_name(cache));
1359 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1360
1361 mg_complete(mg, false);
1362 return;
1363 }
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384 init_continuation(&mg->k, mg_success);
1385 continue_after_commit(&cache->committer, &mg->k);
1386 schedule_commit(&cache->committer);
1387 break;
1388
1389 case POLICY_WRITEBACK:
1390 mg_complete(mg, true);
1391 break;
1392 }
1393}
1394
1395static void mg_update_metadata_after_copy(struct work_struct *ws)
1396{
1397 struct dm_cache_migration *mg = ws_to_mg(ws);
1398
1399
1400
1401
1402 if (mg->k.input)
1403 mg_complete(mg, false);
1404 else
1405 mg_update_metadata(ws);
1406}
1407
1408static void mg_upgrade_lock(struct work_struct *ws)
1409{
1410 int r;
1411 struct dm_cache_migration *mg = ws_to_mg(ws);
1412
1413
1414
1415
1416 if (mg->k.input)
1417 mg_complete(mg, false);
1418
1419 else {
1420
1421
1422
1423 r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1424 READ_WRITE_LOCK_LEVEL);
1425 if (r < 0)
1426 mg_complete(mg, false);
1427
1428 else if (r)
1429 quiesce(mg, mg_update_metadata);
1430
1431 else
1432 mg_update_metadata(ws);
1433 }
1434}
1435
1436static void mg_full_copy(struct work_struct *ws)
1437{
1438 struct dm_cache_migration *mg = ws_to_mg(ws);
1439 struct cache *cache = mg->cache;
1440 struct policy_work *op = mg->op;
1441 bool is_policy_promote = (op->op == POLICY_PROMOTE);
1442
1443 if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1444 is_discarded_oblock(cache, op->oblock)) {
1445 mg_upgrade_lock(ws);
1446 return;
1447 }
1448
1449 init_continuation(&mg->k, mg_upgrade_lock);
1450 copy(mg, is_policy_promote);
1451}
1452
1453static void mg_copy(struct work_struct *ws)
1454{
1455 struct dm_cache_migration *mg = ws_to_mg(ws);
1456
1457 if (mg->overwrite_bio) {
1458
1459
1460
1461
1462
1463 if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
1464
1465
1466
1467 bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
1468 BUG_ON(rb);
1469 mg->overwrite_bio = NULL;
1470 inc_io_migrations(mg->cache);
1471 mg_full_copy(ws);
1472 return;
1473 }
1474
1475
1476
1477
1478
1479
1480
1481
1482 overwrite(mg, mg_update_metadata_after_copy);
1483
1484 } else
1485 mg_full_copy(ws);
1486}
1487
1488static int mg_lock_writes(struct dm_cache_migration *mg)
1489{
1490 int r;
1491 struct dm_cell_key_v2 key;
1492 struct cache *cache = mg->cache;
1493 struct dm_bio_prison_cell_v2 *prealloc;
1494
1495 prealloc = alloc_prison_cell(cache);
1496 if (!prealloc) {
1497 DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
1498 mg_complete(mg, false);
1499 return -ENOMEM;
1500 }
1501
1502
1503
1504
1505
1506
1507 build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1508 r = dm_cell_lock_v2(cache->prison, &key,
1509 mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1510 prealloc, &mg->cell);
1511 if (r < 0) {
1512 free_prison_cell(cache, prealloc);
1513 mg_complete(mg, false);
1514 return r;
1515 }
1516
1517 if (mg->cell != prealloc)
1518 free_prison_cell(cache, prealloc);
1519
1520 if (r == 0)
1521 mg_copy(&mg->k.ws);
1522 else
1523 quiesce(mg, mg_copy);
1524
1525 return 0;
1526}
1527
1528static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1529{
1530 struct dm_cache_migration *mg;
1531
1532 if (!background_work_begin(cache)) {
1533 policy_complete_background_work(cache->policy, op, false);
1534 return -EPERM;
1535 }
1536
1537 mg = alloc_migration(cache);
1538 if (!mg) {
1539 policy_complete_background_work(cache->policy, op, false);
1540 background_work_end(cache);
1541 return -ENOMEM;
1542 }
1543
1544 mg->op = op;
1545 mg->overwrite_bio = bio;
1546
1547 if (!bio)
1548 inc_io_migrations(cache);
1549
1550 return mg_lock_writes(mg);
1551}
1552
1553
1554
1555
1556
1557static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1558{
1559 struct bio_list bios;
1560 struct cache *cache = mg->cache;
1561
1562 bio_list_init(&bios);
1563 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1564 free_prison_cell(cache, mg->cell);
1565
1566 if (!success && mg->overwrite_bio)
1567 bio_io_error(mg->overwrite_bio);
1568
1569 free_migration(mg);
1570 defer_bios(cache, &bios);
1571
1572 background_work_end(cache);
1573}
1574
1575static void invalidate_completed(struct work_struct *ws)
1576{
1577 struct dm_cache_migration *mg = ws_to_mg(ws);
1578 invalidate_complete(mg, !mg->k.input);
1579}
1580
1581static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1582{
1583 int r = policy_invalidate_mapping(cache->policy, cblock);
1584 if (!r) {
1585 r = dm_cache_remove_mapping(cache->cmd, cblock);
1586 if (r) {
1587 DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1588 cache_device_name(cache));
1589 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1590 }
1591
1592 } else if (r == -ENODATA) {
1593
1594
1595
1596 r = 0;
1597
1598 } else
1599 DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1600
1601 return r;
1602}
1603
1604static void invalidate_remove(struct work_struct *ws)
1605{
1606 int r;
1607 struct dm_cache_migration *mg = ws_to_mg(ws);
1608 struct cache *cache = mg->cache;
1609
1610 r = invalidate_cblock(cache, mg->invalidate_cblock);
1611 if (r) {
1612 invalidate_complete(mg, false);
1613 return;
1614 }
1615
1616 init_continuation(&mg->k, invalidate_completed);
1617 continue_after_commit(&cache->committer, &mg->k);
1618 remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1619 mg->overwrite_bio = NULL;
1620 schedule_commit(&cache->committer);
1621}
1622
1623static int invalidate_lock(struct dm_cache_migration *mg)
1624{
1625 int r;
1626 struct dm_cell_key_v2 key;
1627 struct cache *cache = mg->cache;
1628 struct dm_bio_prison_cell_v2 *prealloc;
1629
1630 prealloc = alloc_prison_cell(cache);
1631 if (!prealloc) {
1632 invalidate_complete(mg, false);
1633 return -ENOMEM;
1634 }
1635
1636 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1637 r = dm_cell_lock_v2(cache->prison, &key,
1638 READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1639 if (r < 0) {
1640 free_prison_cell(cache, prealloc);
1641 invalidate_complete(mg, false);
1642 return r;
1643 }
1644
1645 if (mg->cell != prealloc)
1646 free_prison_cell(cache, prealloc);
1647
1648 if (r)
1649 quiesce(mg, invalidate_remove);
1650
1651 else {
1652
1653
1654
1655
1656 init_continuation(&mg->k, invalidate_remove);
1657 queue_work(cache->wq, &mg->k.ws);
1658 }
1659
1660 return 0;
1661}
1662
1663static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1664 dm_oblock_t oblock, struct bio *bio)
1665{
1666 struct dm_cache_migration *mg;
1667
1668 if (!background_work_begin(cache))
1669 return -EPERM;
1670
1671 mg = alloc_migration(cache);
1672 if (!mg) {
1673 background_work_end(cache);
1674 return -ENOMEM;
1675 }
1676
1677 mg->overwrite_bio = bio;
1678 mg->invalidate_cblock = cblock;
1679 mg->invalidate_oblock = oblock;
1680
1681 return invalidate_lock(mg);
1682}
1683
1684
1685
1686
1687
1688enum busy {
1689 IDLE,
1690 BUSY
1691};
1692
1693static enum busy spare_migration_bandwidth(struct cache *cache)
1694{
1695 bool idle = iot_idle_for(&cache->tracker, HZ);
1696 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1697 cache->sectors_per_block;
1698
1699 if (idle && current_volume <= cache->migration_threshold)
1700 return IDLE;
1701 else
1702 return BUSY;
1703}
1704
1705static void inc_hit_counter(struct cache *cache, struct bio *bio)
1706{
1707 atomic_inc(bio_data_dir(bio) == READ ?
1708 &cache->stats.read_hit : &cache->stats.write_hit);
1709}
1710
1711static void inc_miss_counter(struct cache *cache, struct bio *bio)
1712{
1713 atomic_inc(bio_data_dir(bio) == READ ?
1714 &cache->stats.read_miss : &cache->stats.write_miss);
1715}
1716
1717
1718
1719static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1720 bool *commit_needed)
1721{
1722 int r, data_dir;
1723 bool rb, background_queued;
1724 dm_cblock_t cblock;
1725
1726 *commit_needed = false;
1727
1728 rb = bio_detain_shared(cache, block, bio);
1729 if (!rb) {
1730
1731
1732
1733
1734
1735
1736 *commit_needed = true;
1737 return DM_MAPIO_SUBMITTED;
1738 }
1739
1740 data_dir = bio_data_dir(bio);
1741
1742 if (optimisable_bio(cache, bio, block)) {
1743 struct policy_work *op = NULL;
1744
1745 r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1746 if (unlikely(r && r != -ENOENT)) {
1747 DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1748 cache_device_name(cache), r);
1749 bio_io_error(bio);
1750 return DM_MAPIO_SUBMITTED;
1751 }
1752
1753 if (r == -ENOENT && op) {
1754 bio_drop_shared_lock(cache, bio);
1755 BUG_ON(op->op != POLICY_PROMOTE);
1756 mg_start(cache, op, bio);
1757 return DM_MAPIO_SUBMITTED;
1758 }
1759 } else {
1760 r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1761 if (unlikely(r && r != -ENOENT)) {
1762 DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1763 cache_device_name(cache), r);
1764 bio_io_error(bio);
1765 return DM_MAPIO_SUBMITTED;
1766 }
1767
1768 if (background_queued)
1769 wake_migration_worker(cache);
1770 }
1771
1772 if (r == -ENOENT) {
1773 struct per_bio_data *pb = get_per_bio_data(bio);
1774
1775
1776
1777
1778 inc_miss_counter(cache, bio);
1779 if (pb->req_nr == 0) {
1780 accounted_begin(cache, bio);
1781 remap_to_origin_clear_discard(cache, bio, block);
1782 } else {
1783
1784
1785
1786
1787 bio_endio(bio);
1788 return DM_MAPIO_SUBMITTED;
1789 }
1790 } else {
1791
1792
1793
1794 inc_hit_counter(cache, bio);
1795
1796
1797
1798
1799
1800 if (passthrough_mode(cache)) {
1801 if (bio_data_dir(bio) == WRITE) {
1802 bio_drop_shared_lock(cache, bio);
1803 atomic_inc(&cache->stats.demotion);
1804 invalidate_start(cache, cblock, block, bio);
1805 } else
1806 remap_to_origin_clear_discard(cache, bio, block);
1807 } else {
1808 if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
1809 !is_dirty(cache, cblock)) {
1810 remap_to_origin_and_cache(cache, bio, block, cblock);
1811 accounted_begin(cache, bio);
1812 } else
1813 remap_to_cache_dirty(cache, bio, block, cblock);
1814 }
1815 }
1816
1817
1818
1819
1820 if (bio->bi_opf & REQ_FUA) {
1821
1822
1823
1824
1825 accounted_complete(cache, bio);
1826 issue_after_commit(&cache->committer, bio);
1827 *commit_needed = true;
1828 return DM_MAPIO_SUBMITTED;
1829 }
1830
1831 return DM_MAPIO_REMAPPED;
1832}
1833
1834static bool process_bio(struct cache *cache, struct bio *bio)
1835{
1836 bool commit_needed;
1837
1838 if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1839 generic_make_request(bio);
1840
1841 return commit_needed;
1842}
1843
1844
1845
1846
1847static int commit(struct cache *cache, bool clean_shutdown)
1848{
1849 int r;
1850
1851 if (get_cache_mode(cache) >= CM_READ_ONLY)
1852 return -EINVAL;
1853
1854 atomic_inc(&cache->stats.commit_count);
1855 r = dm_cache_commit(cache->cmd, clean_shutdown);
1856 if (r)
1857 metadata_operation_failed(cache, "dm_cache_commit", r);
1858
1859 return r;
1860}
1861
1862
1863
1864
1865static blk_status_t commit_op(void *context)
1866{
1867 struct cache *cache = context;
1868
1869 if (dm_cache_changed_this_transaction(cache->cmd))
1870 return errno_to_blk_status(commit(cache, false));
1871
1872 return 0;
1873}
1874
1875
1876
1877static bool process_flush_bio(struct cache *cache, struct bio *bio)
1878{
1879 struct per_bio_data *pb = get_per_bio_data(bio);
1880
1881 if (!pb->req_nr)
1882 remap_to_origin(cache, bio);
1883 else
1884 remap_to_cache(cache, bio, 0);
1885
1886 issue_after_commit(&cache->committer, bio);
1887 return true;
1888}
1889
1890static bool process_discard_bio(struct cache *cache, struct bio *bio)
1891{
1892 dm_dblock_t b, e;
1893
1894
1895
1896
1897 calc_discard_block_range(cache, bio, &b, &e);
1898 while (b != e) {
1899 set_discard(cache, b);
1900 b = to_dblock(from_dblock(b) + 1);
1901 }
1902
1903 if (cache->features.discard_passdown) {
1904 remap_to_origin(cache, bio);
1905 generic_make_request(bio);
1906 } else
1907 bio_endio(bio);
1908
1909 return false;
1910}
1911
1912static void process_deferred_bios(struct work_struct *ws)
1913{
1914 struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1915
1916 unsigned long flags;
1917 bool commit_needed = false;
1918 struct bio_list bios;
1919 struct bio *bio;
1920
1921 bio_list_init(&bios);
1922
1923 spin_lock_irqsave(&cache->lock, flags);
1924 bio_list_merge(&bios, &cache->deferred_bios);
1925 bio_list_init(&cache->deferred_bios);
1926 spin_unlock_irqrestore(&cache->lock, flags);
1927
1928 while ((bio = bio_list_pop(&bios))) {
1929 if (bio->bi_opf & REQ_PREFLUSH)
1930 commit_needed = process_flush_bio(cache, bio) || commit_needed;
1931
1932 else if (bio_op(bio) == REQ_OP_DISCARD)
1933 commit_needed = process_discard_bio(cache, bio) || commit_needed;
1934
1935 else
1936 commit_needed = process_bio(cache, bio) || commit_needed;
1937 }
1938
1939 if (commit_needed)
1940 schedule_commit(&cache->committer);
1941}
1942
1943
1944
1945
1946
1947static void requeue_deferred_bios(struct cache *cache)
1948{
1949 struct bio *bio;
1950 struct bio_list bios;
1951
1952 bio_list_init(&bios);
1953 bio_list_merge(&bios, &cache->deferred_bios);
1954 bio_list_init(&cache->deferred_bios);
1955
1956 while ((bio = bio_list_pop(&bios))) {
1957 bio->bi_status = BLK_STS_DM_REQUEUE;
1958 bio_endio(bio);
1959 }
1960}
1961
1962
1963
1964
1965
1966static void do_waker(struct work_struct *ws)
1967{
1968 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1969
1970 policy_tick(cache->policy, true);
1971 wake_migration_worker(cache);
1972 schedule_commit(&cache->committer);
1973 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1974}
1975
1976static void check_migrations(struct work_struct *ws)
1977{
1978 int r;
1979 struct policy_work *op;
1980 struct cache *cache = container_of(ws, struct cache, migration_worker);
1981 enum busy b;
1982
1983 for (;;) {
1984 b = spare_migration_bandwidth(cache);
1985
1986 r = policy_get_background_work(cache->policy, b == IDLE, &op);
1987 if (r == -ENODATA)
1988 break;
1989
1990 if (r) {
1991 DMERR_LIMIT("%s: policy_background_work failed",
1992 cache_device_name(cache));
1993 break;
1994 }
1995
1996 r = mg_start(cache, op, NULL);
1997 if (r)
1998 break;
1999 }
2000}
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010static void destroy(struct cache *cache)
2011{
2012 unsigned i;
2013
2014 mempool_exit(&cache->migration_pool);
2015
2016 if (cache->prison)
2017 dm_bio_prison_destroy_v2(cache->prison);
2018
2019 if (cache->wq)
2020 destroy_workqueue(cache->wq);
2021
2022 if (cache->dirty_bitset)
2023 free_bitset(cache->dirty_bitset);
2024
2025 if (cache->discard_bitset)
2026 free_bitset(cache->discard_bitset);
2027
2028 if (cache->copier)
2029 dm_kcopyd_client_destroy(cache->copier);
2030
2031 if (cache->cmd)
2032 dm_cache_metadata_close(cache->cmd);
2033
2034 if (cache->metadata_dev)
2035 dm_put_device(cache->ti, cache->metadata_dev);
2036
2037 if (cache->origin_dev)
2038 dm_put_device(cache->ti, cache->origin_dev);
2039
2040 if (cache->cache_dev)
2041 dm_put_device(cache->ti, cache->cache_dev);
2042
2043 if (cache->policy)
2044 dm_cache_policy_destroy(cache->policy);
2045
2046 for (i = 0; i < cache->nr_ctr_args ; i++)
2047 kfree(cache->ctr_args[i]);
2048 kfree(cache->ctr_args);
2049
2050 bioset_exit(&cache->bs);
2051
2052 kfree(cache);
2053}
2054
2055static void cache_dtr(struct dm_target *ti)
2056{
2057 struct cache *cache = ti->private;
2058
2059 destroy(cache);
2060}
2061
2062static sector_t get_dev_size(struct dm_dev *dev)
2063{
2064 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2065}
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098struct cache_args {
2099 struct dm_target *ti;
2100
2101 struct dm_dev *metadata_dev;
2102
2103 struct dm_dev *cache_dev;
2104 sector_t cache_sectors;
2105
2106 struct dm_dev *origin_dev;
2107 sector_t origin_sectors;
2108
2109 uint32_t block_size;
2110
2111 const char *policy_name;
2112 int policy_argc;
2113 const char **policy_argv;
2114
2115 struct cache_features features;
2116};
2117
2118static void destroy_cache_args(struct cache_args *ca)
2119{
2120 if (ca->metadata_dev)
2121 dm_put_device(ca->ti, ca->metadata_dev);
2122
2123 if (ca->cache_dev)
2124 dm_put_device(ca->ti, ca->cache_dev);
2125
2126 if (ca->origin_dev)
2127 dm_put_device(ca->ti, ca->origin_dev);
2128
2129 kfree(ca);
2130}
2131
2132static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2133{
2134 if (!as->argc) {
2135 *error = "Insufficient args";
2136 return false;
2137 }
2138
2139 return true;
2140}
2141
2142static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2143 char **error)
2144{
2145 int r;
2146 sector_t metadata_dev_size;
2147 char b[BDEVNAME_SIZE];
2148
2149 if (!at_least_one_arg(as, error))
2150 return -EINVAL;
2151
2152 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2153 &ca->metadata_dev);
2154 if (r) {
2155 *error = "Error opening metadata device";
2156 return r;
2157 }
2158
2159 metadata_dev_size = get_dev_size(ca->metadata_dev);
2160 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2161 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2162 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2163
2164 return 0;
2165}
2166
2167static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2168 char **error)
2169{
2170 int r;
2171
2172 if (!at_least_one_arg(as, error))
2173 return -EINVAL;
2174
2175 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2176 &ca->cache_dev);
2177 if (r) {
2178 *error = "Error opening cache device";
2179 return r;
2180 }
2181 ca->cache_sectors = get_dev_size(ca->cache_dev);
2182
2183 return 0;
2184}
2185
2186static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2187 char **error)
2188{
2189 int r;
2190
2191 if (!at_least_one_arg(as, error))
2192 return -EINVAL;
2193
2194 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2195 &ca->origin_dev);
2196 if (r) {
2197 *error = "Error opening origin device";
2198 return r;
2199 }
2200
2201 ca->origin_sectors = get_dev_size(ca->origin_dev);
2202 if (ca->ti->len > ca->origin_sectors) {
2203 *error = "Device size larger than cached device";
2204 return -EINVAL;
2205 }
2206
2207 return 0;
2208}
2209
2210static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2211 char **error)
2212{
2213 unsigned long block_size;
2214
2215 if (!at_least_one_arg(as, error))
2216 return -EINVAL;
2217
2218 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2219 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2220 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2221 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2222 *error = "Invalid data block size";
2223 return -EINVAL;
2224 }
2225
2226 if (block_size > ca->cache_sectors) {
2227 *error = "Data block size is larger than the cache device";
2228 return -EINVAL;
2229 }
2230
2231 ca->block_size = block_size;
2232
2233 return 0;
2234}
2235
2236static void init_features(struct cache_features *cf)
2237{
2238 cf->mode = CM_WRITE;
2239 cf->io_mode = CM_IO_WRITEBACK;
2240 cf->metadata_version = 1;
2241 cf->discard_passdown = true;
2242}
2243
2244static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2245 char **error)
2246{
2247 static const struct dm_arg _args[] = {
2248 {0, 3, "Invalid number of cache feature arguments"},
2249 };
2250
2251 int r, mode_ctr = 0;
2252 unsigned argc;
2253 const char *arg;
2254 struct cache_features *cf = &ca->features;
2255
2256 init_features(cf);
2257
2258 r = dm_read_arg_group(_args, as, &argc, error);
2259 if (r)
2260 return -EINVAL;
2261
2262 while (argc--) {
2263 arg = dm_shift_arg(as);
2264
2265 if (!strcasecmp(arg, "writeback")) {
2266 cf->io_mode = CM_IO_WRITEBACK;
2267 mode_ctr++;
2268 }
2269
2270 else if (!strcasecmp(arg, "writethrough")) {
2271 cf->io_mode = CM_IO_WRITETHROUGH;
2272 mode_ctr++;
2273 }
2274
2275 else if (!strcasecmp(arg, "passthrough")) {
2276 cf->io_mode = CM_IO_PASSTHROUGH;
2277 mode_ctr++;
2278 }
2279
2280 else if (!strcasecmp(arg, "metadata2"))
2281 cf->metadata_version = 2;
2282
2283 else if (!strcasecmp(arg, "no_discard_passdown"))
2284 cf->discard_passdown = false;
2285
2286 else {
2287 *error = "Unrecognised cache feature requested";
2288 return -EINVAL;
2289 }
2290 }
2291
2292 if (mode_ctr > 1) {
2293 *error = "Duplicate cache io_mode features requested";
2294 return -EINVAL;
2295 }
2296
2297 return 0;
2298}
2299
2300static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2301 char **error)
2302{
2303 static const struct dm_arg _args[] = {
2304 {0, 1024, "Invalid number of policy arguments"},
2305 };
2306
2307 int r;
2308
2309 if (!at_least_one_arg(as, error))
2310 return -EINVAL;
2311
2312 ca->policy_name = dm_shift_arg(as);
2313
2314 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2315 if (r)
2316 return -EINVAL;
2317
2318 ca->policy_argv = (const char **)as->argv;
2319 dm_consume_args(as, ca->policy_argc);
2320
2321 return 0;
2322}
2323
2324static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2325 char **error)
2326{
2327 int r;
2328 struct dm_arg_set as;
2329
2330 as.argc = argc;
2331 as.argv = argv;
2332
2333 r = parse_metadata_dev(ca, &as, error);
2334 if (r)
2335 return r;
2336
2337 r = parse_cache_dev(ca, &as, error);
2338 if (r)
2339 return r;
2340
2341 r = parse_origin_dev(ca, &as, error);
2342 if (r)
2343 return r;
2344
2345 r = parse_block_size(ca, &as, error);
2346 if (r)
2347 return r;
2348
2349 r = parse_features(ca, &as, error);
2350 if (r)
2351 return r;
2352
2353 r = parse_policy(ca, &as, error);
2354 if (r)
2355 return r;
2356
2357 return 0;
2358}
2359
2360
2361
2362static struct kmem_cache *migration_cache;
2363
2364#define NOT_CORE_OPTION 1
2365
2366static int process_config_option(struct cache *cache, const char *key, const char *value)
2367{
2368 unsigned long tmp;
2369
2370 if (!strcasecmp(key, "migration_threshold")) {
2371 if (kstrtoul(value, 10, &tmp))
2372 return -EINVAL;
2373
2374 cache->migration_threshold = tmp;
2375 return 0;
2376 }
2377
2378 return NOT_CORE_OPTION;
2379}
2380
2381static int set_config_value(struct cache *cache, const char *key, const char *value)
2382{
2383 int r = process_config_option(cache, key, value);
2384
2385 if (r == NOT_CORE_OPTION)
2386 r = policy_set_config_value(cache->policy, key, value);
2387
2388 if (r)
2389 DMWARN("bad config value for %s: %s", key, value);
2390
2391 return r;
2392}
2393
2394static int set_config_values(struct cache *cache, int argc, const char **argv)
2395{
2396 int r = 0;
2397
2398 if (argc & 1) {
2399 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2400 return -EINVAL;
2401 }
2402
2403 while (argc) {
2404 r = set_config_value(cache, argv[0], argv[1]);
2405 if (r)
2406 break;
2407
2408 argc -= 2;
2409 argv += 2;
2410 }
2411
2412 return r;
2413}
2414
2415static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2416 char **error)
2417{
2418 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2419 cache->cache_size,
2420 cache->origin_sectors,
2421 cache->sectors_per_block);
2422 if (IS_ERR(p)) {
2423 *error = "Error creating cache's policy";
2424 return PTR_ERR(p);
2425 }
2426 cache->policy = p;
2427 BUG_ON(!cache->policy);
2428
2429 return 0;
2430}
2431
2432
2433
2434
2435
2436#define MAX_DISCARD_BLOCKS (1 << 14)
2437
2438static bool too_many_discard_blocks(sector_t discard_block_size,
2439 sector_t origin_size)
2440{
2441 (void) sector_div(origin_size, discard_block_size);
2442
2443 return origin_size > MAX_DISCARD_BLOCKS;
2444}
2445
2446static sector_t calculate_discard_block_size(sector_t cache_block_size,
2447 sector_t origin_size)
2448{
2449 sector_t discard_block_size = cache_block_size;
2450
2451 if (origin_size)
2452 while (too_many_discard_blocks(discard_block_size, origin_size))
2453 discard_block_size *= 2;
2454
2455 return discard_block_size;
2456}
2457
2458static void set_cache_size(struct cache *cache, dm_cblock_t size)
2459{
2460 dm_block_t nr_blocks = from_cblock(size);
2461
2462 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2463 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2464 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2465 "Please consider increasing the cache block size to reduce the overall cache block count.",
2466 (unsigned long long) nr_blocks);
2467
2468 cache->cache_size = size;
2469}
2470
2471static int is_congested(struct dm_dev *dev, int bdi_bits)
2472{
2473 struct request_queue *q = bdev_get_queue(dev->bdev);
2474 return bdi_congested(q->backing_dev_info, bdi_bits);
2475}
2476
2477static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2478{
2479 struct cache *cache = container_of(cb, struct cache, callbacks);
2480
2481 return is_congested(cache->origin_dev, bdi_bits) ||
2482 is_congested(cache->cache_dev, bdi_bits);
2483}
2484
2485#define DEFAULT_MIGRATION_THRESHOLD 2048
2486
2487static int cache_create(struct cache_args *ca, struct cache **result)
2488{
2489 int r = 0;
2490 char **error = &ca->ti->error;
2491 struct cache *cache;
2492 struct dm_target *ti = ca->ti;
2493 dm_block_t origin_blocks;
2494 struct dm_cache_metadata *cmd;
2495 bool may_format = ca->features.mode == CM_WRITE;
2496
2497 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2498 if (!cache)
2499 return -ENOMEM;
2500
2501 cache->ti = ca->ti;
2502 ti->private = cache;
2503 ti->num_flush_bios = 2;
2504 ti->flush_supported = true;
2505
2506 ti->num_discard_bios = 1;
2507 ti->discards_supported = true;
2508
2509 ti->per_io_data_size = sizeof(struct per_bio_data);
2510
2511 cache->features = ca->features;
2512 if (writethrough_mode(cache)) {
2513
2514 r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
2515 if (r)
2516 goto bad;
2517 }
2518
2519 cache->callbacks.congested_fn = cache_is_congested;
2520 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2521
2522 cache->metadata_dev = ca->metadata_dev;
2523 cache->origin_dev = ca->origin_dev;
2524 cache->cache_dev = ca->cache_dev;
2525
2526 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2527
2528 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2529 origin_blocks = block_div(origin_blocks, ca->block_size);
2530 cache->origin_blocks = to_oblock(origin_blocks);
2531
2532 cache->sectors_per_block = ca->block_size;
2533 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2534 r = -EINVAL;
2535 goto bad;
2536 }
2537
2538 if (ca->block_size & (ca->block_size - 1)) {
2539 dm_block_t cache_size = ca->cache_sectors;
2540
2541 cache->sectors_per_block_shift = -1;
2542 cache_size = block_div(cache_size, ca->block_size);
2543 set_cache_size(cache, to_cblock(cache_size));
2544 } else {
2545 cache->sectors_per_block_shift = __ffs(ca->block_size);
2546 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2547 }
2548
2549 r = create_cache_policy(cache, ca, error);
2550 if (r)
2551 goto bad;
2552
2553 cache->policy_nr_args = ca->policy_argc;
2554 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2555
2556 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2557 if (r) {
2558 *error = "Error setting cache policy's config values";
2559 goto bad;
2560 }
2561
2562 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2563 ca->block_size, may_format,
2564 dm_cache_policy_get_hint_size(cache->policy),
2565 ca->features.metadata_version);
2566 if (IS_ERR(cmd)) {
2567 *error = "Error creating metadata object";
2568 r = PTR_ERR(cmd);
2569 goto bad;
2570 }
2571 cache->cmd = cmd;
2572 set_cache_mode(cache, CM_WRITE);
2573 if (get_cache_mode(cache) != CM_WRITE) {
2574 *error = "Unable to get write access to metadata, please check/repair metadata.";
2575 r = -EINVAL;
2576 goto bad;
2577 }
2578
2579 if (passthrough_mode(cache)) {
2580 bool all_clean;
2581
2582 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2583 if (r) {
2584 *error = "dm_cache_metadata_all_clean() failed";
2585 goto bad;
2586 }
2587
2588 if (!all_clean) {
2589 *error = "Cannot enter passthrough mode unless all blocks are clean";
2590 r = -EINVAL;
2591 goto bad;
2592 }
2593
2594 policy_allow_migrations(cache->policy, false);
2595 }
2596
2597 spin_lock_init(&cache->lock);
2598 bio_list_init(&cache->deferred_bios);
2599 atomic_set(&cache->nr_allocated_migrations, 0);
2600 atomic_set(&cache->nr_io_migrations, 0);
2601 init_waitqueue_head(&cache->migration_wait);
2602
2603 r = -ENOMEM;
2604 atomic_set(&cache->nr_dirty, 0);
2605 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2606 if (!cache->dirty_bitset) {
2607 *error = "could not allocate dirty bitset";
2608 goto bad;
2609 }
2610 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2611
2612 cache->discard_block_size =
2613 calculate_discard_block_size(cache->sectors_per_block,
2614 cache->origin_sectors);
2615 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2616 cache->discard_block_size));
2617 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2618 if (!cache->discard_bitset) {
2619 *error = "could not allocate discard bitset";
2620 goto bad;
2621 }
2622 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2623
2624 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2625 if (IS_ERR(cache->copier)) {
2626 *error = "could not create kcopyd client";
2627 r = PTR_ERR(cache->copier);
2628 goto bad;
2629 }
2630
2631 cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2632 if (!cache->wq) {
2633 *error = "could not create workqueue for metadata object";
2634 goto bad;
2635 }
2636 INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2637 INIT_WORK(&cache->migration_worker, check_migrations);
2638 INIT_DELAYED_WORK(&cache->waker, do_waker);
2639
2640 cache->prison = dm_bio_prison_create_v2(cache->wq);
2641 if (!cache->prison) {
2642 *error = "could not create bio prison";
2643 goto bad;
2644 }
2645
2646 r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
2647 migration_cache);
2648 if (r) {
2649 *error = "Error creating cache's migration mempool";
2650 goto bad;
2651 }
2652
2653 cache->need_tick_bio = true;
2654 cache->sized = false;
2655 cache->invalidate = false;
2656 cache->commit_requested = false;
2657 cache->loaded_mappings = false;
2658 cache->loaded_discards = false;
2659
2660 load_stats(cache);
2661
2662 atomic_set(&cache->stats.demotion, 0);
2663 atomic_set(&cache->stats.promotion, 0);
2664 atomic_set(&cache->stats.copies_avoided, 0);
2665 atomic_set(&cache->stats.cache_cell_clash, 0);
2666 atomic_set(&cache->stats.commit_count, 0);
2667 atomic_set(&cache->stats.discard_count, 0);
2668
2669 spin_lock_init(&cache->invalidation_lock);
2670 INIT_LIST_HEAD(&cache->invalidation_requests);
2671
2672 batcher_init(&cache->committer, commit_op, cache,
2673 issue_op, cache, cache->wq);
2674 iot_init(&cache->tracker);
2675
2676 init_rwsem(&cache->background_work_lock);
2677 prevent_background_work(cache);
2678
2679 *result = cache;
2680 return 0;
2681bad:
2682 destroy(cache);
2683 return r;
2684}
2685
2686static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2687{
2688 unsigned i;
2689 const char **copy;
2690
2691 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2692 if (!copy)
2693 return -ENOMEM;
2694 for (i = 0; i < argc; i++) {
2695 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2696 if (!copy[i]) {
2697 while (i--)
2698 kfree(copy[i]);
2699 kfree(copy);
2700 return -ENOMEM;
2701 }
2702 }
2703
2704 cache->nr_ctr_args = argc;
2705 cache->ctr_args = copy;
2706
2707 return 0;
2708}
2709
2710static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2711{
2712 int r = -EINVAL;
2713 struct cache_args *ca;
2714 struct cache *cache = NULL;
2715
2716 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2717 if (!ca) {
2718 ti->error = "Error allocating memory for cache";
2719 return -ENOMEM;
2720 }
2721 ca->ti = ti;
2722
2723 r = parse_cache_args(ca, argc, argv, &ti->error);
2724 if (r)
2725 goto out;
2726
2727 r = cache_create(ca, &cache);
2728 if (r)
2729 goto out;
2730
2731 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2732 if (r) {
2733 destroy(cache);
2734 goto out;
2735 }
2736
2737 ti->private = cache;
2738out:
2739 destroy_cache_args(ca);
2740 return r;
2741}
2742
2743
2744
2745static int cache_map(struct dm_target *ti, struct bio *bio)
2746{
2747 struct cache *cache = ti->private;
2748
2749 int r;
2750 bool commit_needed;
2751 dm_oblock_t block = get_bio_block(cache, bio);
2752
2753 init_per_bio_data(bio);
2754 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2755
2756
2757
2758
2759
2760 remap_to_origin(cache, bio);
2761 accounted_begin(cache, bio);
2762 return DM_MAPIO_REMAPPED;
2763 }
2764
2765 if (discard_or_flush(bio)) {
2766 defer_bio(cache, bio);
2767 return DM_MAPIO_SUBMITTED;
2768 }
2769
2770 r = map_bio(cache, bio, block, &commit_needed);
2771 if (commit_needed)
2772 schedule_commit(&cache->committer);
2773
2774 return r;
2775}
2776
2777static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
2778{
2779 struct cache *cache = ti->private;
2780 unsigned long flags;
2781 struct per_bio_data *pb = get_per_bio_data(bio);
2782
2783 if (pb->tick) {
2784 policy_tick(cache->policy, false);
2785
2786 spin_lock_irqsave(&cache->lock, flags);
2787 cache->need_tick_bio = true;
2788 spin_unlock_irqrestore(&cache->lock, flags);
2789 }
2790
2791 bio_drop_shared_lock(cache, bio);
2792 accounted_complete(cache, bio);
2793
2794 return DM_ENDIO_DONE;
2795}
2796
2797static int write_dirty_bitset(struct cache *cache)
2798{
2799 int r;
2800
2801 if (get_cache_mode(cache) >= CM_READ_ONLY)
2802 return -EINVAL;
2803
2804 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2805 if (r)
2806 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2807
2808 return r;
2809}
2810
2811static int write_discard_bitset(struct cache *cache)
2812{
2813 unsigned i, r;
2814
2815 if (get_cache_mode(cache) >= CM_READ_ONLY)
2816 return -EINVAL;
2817
2818 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2819 cache->discard_nr_blocks);
2820 if (r) {
2821 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2822 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2823 return r;
2824 }
2825
2826 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2827 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2828 is_discarded(cache, to_dblock(i)));
2829 if (r) {
2830 metadata_operation_failed(cache, "dm_cache_set_discard", r);
2831 return r;
2832 }
2833 }
2834
2835 return 0;
2836}
2837
2838static int write_hints(struct cache *cache)
2839{
2840 int r;
2841
2842 if (get_cache_mode(cache) >= CM_READ_ONLY)
2843 return -EINVAL;
2844
2845 r = dm_cache_write_hints(cache->cmd, cache->policy);
2846 if (r) {
2847 metadata_operation_failed(cache, "dm_cache_write_hints", r);
2848 return r;
2849 }
2850
2851 return 0;
2852}
2853
2854
2855
2856
2857static bool sync_metadata(struct cache *cache)
2858{
2859 int r1, r2, r3, r4;
2860
2861 r1 = write_dirty_bitset(cache);
2862 if (r1)
2863 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2864
2865 r2 = write_discard_bitset(cache);
2866 if (r2)
2867 DMERR("%s: could not write discard bitset", cache_device_name(cache));
2868
2869 save_stats(cache);
2870
2871 r3 = write_hints(cache);
2872 if (r3)
2873 DMERR("%s: could not write hints", cache_device_name(cache));
2874
2875
2876
2877
2878
2879
2880 r4 = commit(cache, !r1 && !r2 && !r3);
2881 if (r4)
2882 DMERR("%s: could not write cache metadata", cache_device_name(cache));
2883
2884 return !r1 && !r2 && !r3 && !r4;
2885}
2886
2887static void cache_postsuspend(struct dm_target *ti)
2888{
2889 struct cache *cache = ti->private;
2890
2891 prevent_background_work(cache);
2892 BUG_ON(atomic_read(&cache->nr_io_migrations));
2893
2894 cancel_delayed_work(&cache->waker);
2895 flush_workqueue(cache->wq);
2896 WARN_ON(cache->tracker.in_flight);
2897
2898
2899
2900
2901
2902 requeue_deferred_bios(cache);
2903
2904 if (get_cache_mode(cache) == CM_WRITE)
2905 (void) sync_metadata(cache);
2906}
2907
2908static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2909 bool dirty, uint32_t hint, bool hint_valid)
2910{
2911 int r;
2912 struct cache *cache = context;
2913
2914 if (dirty) {
2915 set_bit(from_cblock(cblock), cache->dirty_bitset);
2916 atomic_inc(&cache->nr_dirty);
2917 } else
2918 clear_bit(from_cblock(cblock), cache->dirty_bitset);
2919
2920 r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2921 if (r)
2922 return r;
2923
2924 return 0;
2925}
2926
2927
2928
2929
2930
2931
2932
2933struct discard_load_info {
2934 struct cache *cache;
2935
2936
2937
2938
2939
2940 dm_block_t block_size;
2941 dm_block_t discard_begin, discard_end;
2942};
2943
2944static void discard_load_info_init(struct cache *cache,
2945 struct discard_load_info *li)
2946{
2947 li->cache = cache;
2948 li->discard_begin = li->discard_end = 0;
2949}
2950
2951static void set_discard_range(struct discard_load_info *li)
2952{
2953 sector_t b, e;
2954
2955 if (li->discard_begin == li->discard_end)
2956 return;
2957
2958
2959
2960
2961 b = li->discard_begin * li->block_size;
2962 e = li->discard_end * li->block_size;
2963
2964
2965
2966
2967 b = dm_sector_div_up(b, li->cache->discard_block_size);
2968 sector_div(e, li->cache->discard_block_size);
2969
2970
2971
2972
2973
2974 if (e > from_dblock(li->cache->discard_nr_blocks))
2975 e = from_dblock(li->cache->discard_nr_blocks);
2976
2977 for (; b < e; b++)
2978 set_discard(li->cache, to_dblock(b));
2979}
2980
2981static int load_discard(void *context, sector_t discard_block_size,
2982 dm_dblock_t dblock, bool discard)
2983{
2984 struct discard_load_info *li = context;
2985
2986 li->block_size = discard_block_size;
2987
2988 if (discard) {
2989 if (from_dblock(dblock) == li->discard_end)
2990
2991
2992
2993 li->discard_end = li->discard_end + 1ULL;
2994
2995 else {
2996
2997
2998
2999 set_discard_range(li);
3000 li->discard_begin = from_dblock(dblock);
3001 li->discard_end = li->discard_begin + 1ULL;
3002 }
3003 } else {
3004 set_discard_range(li);
3005 li->discard_begin = li->discard_end = 0;
3006 }
3007
3008 return 0;
3009}
3010
3011static dm_cblock_t get_cache_dev_size(struct cache *cache)
3012{
3013 sector_t size = get_dev_size(cache->cache_dev);
3014 (void) sector_div(size, cache->sectors_per_block);
3015 return to_cblock(size);
3016}
3017
3018static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3019{
3020 if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
3021 if (cache->sized) {
3022 DMERR("%s: unable to extend cache due to missing cache table reload",
3023 cache_device_name(cache));
3024 return false;
3025 }
3026 }
3027
3028
3029
3030
3031 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3032 new_size = to_cblock(from_cblock(new_size) + 1);
3033 if (is_dirty(cache, new_size)) {
3034 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
3035 cache_device_name(cache),
3036 (unsigned long long) from_cblock(new_size));
3037 return false;
3038 }
3039 }
3040
3041 return true;
3042}
3043
3044static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3045{
3046 int r;
3047
3048 r = dm_cache_resize(cache->cmd, new_size);
3049 if (r) {
3050 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3051 metadata_operation_failed(cache, "dm_cache_resize", r);
3052 return r;
3053 }
3054
3055 set_cache_size(cache, new_size);
3056
3057 return 0;
3058}
3059
3060static int cache_preresume(struct dm_target *ti)
3061{
3062 int r = 0;
3063 struct cache *cache = ti->private;
3064 dm_cblock_t csize = get_cache_dev_size(cache);
3065
3066
3067
3068
3069 if (!cache->sized) {
3070 r = resize_cache_dev(cache, csize);
3071 if (r)
3072 return r;
3073
3074 cache->sized = true;
3075
3076 } else if (csize != cache->cache_size) {
3077 if (!can_resize(cache, csize))
3078 return -EINVAL;
3079
3080 r = resize_cache_dev(cache, csize);
3081 if (r)
3082 return r;
3083 }
3084
3085 if (!cache->loaded_mappings) {
3086 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3087 load_mapping, cache);
3088 if (r) {
3089 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3090 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3091 return r;
3092 }
3093
3094 cache->loaded_mappings = true;
3095 }
3096
3097 if (!cache->loaded_discards) {
3098 struct discard_load_info li;
3099
3100
3101
3102
3103
3104
3105 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3106
3107 discard_load_info_init(cache, &li);
3108 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3109 if (r) {
3110 DMERR("%s: could not load origin discards", cache_device_name(cache));
3111 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3112 return r;
3113 }
3114 set_discard_range(&li);
3115
3116 cache->loaded_discards = true;
3117 }
3118
3119 return r;
3120}
3121
3122static void cache_resume(struct dm_target *ti)
3123{
3124 struct cache *cache = ti->private;
3125
3126 cache->need_tick_bio = true;
3127 allow_background_work(cache);
3128 do_waker(&cache->waker.work);
3129}
3130
3131static void emit_flags(struct cache *cache, char *result,
3132 unsigned maxlen, ssize_t *sz_ptr)
3133{
3134 ssize_t sz = *sz_ptr;
3135 struct cache_features *cf = &cache->features;
3136 unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
3137
3138 DMEMIT("%u ", count);
3139
3140 if (cf->metadata_version == 2)
3141 DMEMIT("metadata2 ");
3142
3143 if (writethrough_mode(cache))
3144 DMEMIT("writethrough ");
3145
3146 else if (passthrough_mode(cache))
3147 DMEMIT("passthrough ");
3148
3149 else if (writeback_mode(cache))
3150 DMEMIT("writeback ");
3151
3152 else {
3153 DMEMIT("unknown ");
3154 DMERR("%s: internal error: unknown io mode: %d",
3155 cache_device_name(cache), (int) cf->io_mode);
3156 }
3157
3158 if (!cf->discard_passdown)
3159 DMEMIT("no_discard_passdown ");
3160
3161 *sz_ptr = sz;
3162}
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175static void cache_status(struct dm_target *ti, status_type_t type,
3176 unsigned status_flags, char *result, unsigned maxlen)
3177{
3178 int r = 0;
3179 unsigned i;
3180 ssize_t sz = 0;
3181 dm_block_t nr_free_blocks_metadata = 0;
3182 dm_block_t nr_blocks_metadata = 0;
3183 char buf[BDEVNAME_SIZE];
3184 struct cache *cache = ti->private;
3185 dm_cblock_t residency;
3186 bool needs_check;
3187
3188 switch (type) {
3189 case STATUSTYPE_INFO:
3190 if (get_cache_mode(cache) == CM_FAIL) {
3191 DMEMIT("Fail");
3192 break;
3193 }
3194
3195
3196 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3197 (void) commit(cache, false);
3198
3199 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3200 if (r) {
3201 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3202 cache_device_name(cache), r);
3203 goto err;
3204 }
3205
3206 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3207 if (r) {
3208 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3209 cache_device_name(cache), r);
3210 goto err;
3211 }
3212
3213 residency = policy_residency(cache->policy);
3214
3215 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3216 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3217 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3218 (unsigned long long)nr_blocks_metadata,
3219 (unsigned long long)cache->sectors_per_block,
3220 (unsigned long long) from_cblock(residency),
3221 (unsigned long long) from_cblock(cache->cache_size),
3222 (unsigned) atomic_read(&cache->stats.read_hit),
3223 (unsigned) atomic_read(&cache->stats.read_miss),
3224 (unsigned) atomic_read(&cache->stats.write_hit),
3225 (unsigned) atomic_read(&cache->stats.write_miss),
3226 (unsigned) atomic_read(&cache->stats.demotion),
3227 (unsigned) atomic_read(&cache->stats.promotion),
3228 (unsigned long) atomic_read(&cache->nr_dirty));
3229
3230 emit_flags(cache, result, maxlen, &sz);
3231
3232 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3233
3234 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3235 if (sz < maxlen) {
3236 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3237 if (r)
3238 DMERR("%s: policy_emit_config_values returned %d",
3239 cache_device_name(cache), r);
3240 }
3241
3242 if (get_cache_mode(cache) == CM_READ_ONLY)
3243 DMEMIT("ro ");
3244 else
3245 DMEMIT("rw ");
3246
3247 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3248
3249 if (r || needs_check)
3250 DMEMIT("needs_check ");
3251 else
3252 DMEMIT("- ");
3253
3254 break;
3255
3256 case STATUSTYPE_TABLE:
3257 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3258 DMEMIT("%s ", buf);
3259 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3260 DMEMIT("%s ", buf);
3261 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3262 DMEMIT("%s", buf);
3263
3264 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3265 DMEMIT(" %s", cache->ctr_args[i]);
3266 if (cache->nr_ctr_args)
3267 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3268 }
3269
3270 return;
3271
3272err:
3273 DMEMIT("Error");
3274}
3275
3276
3277
3278
3279
3280struct cblock_range {
3281 dm_cblock_t begin;
3282 dm_cblock_t end;
3283};
3284
3285
3286
3287
3288
3289
3290
3291static int parse_cblock_range(struct cache *cache, const char *str,
3292 struct cblock_range *result)
3293{
3294 char dummy;
3295 uint64_t b, e;
3296 int r;
3297
3298
3299
3300
3301 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3302 if (r < 0)
3303 return r;
3304
3305 if (r == 2) {
3306 result->begin = to_cblock(b);
3307 result->end = to_cblock(e);
3308 return 0;
3309 }
3310
3311
3312
3313
3314 r = sscanf(str, "%llu%c", &b, &dummy);
3315 if (r < 0)
3316 return r;
3317
3318 if (r == 1) {
3319 result->begin = to_cblock(b);
3320 result->end = to_cblock(from_cblock(result->begin) + 1u);
3321 return 0;
3322 }
3323
3324 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3325 return -EINVAL;
3326}
3327
3328static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3329{
3330 uint64_t b = from_cblock(range->begin);
3331 uint64_t e = from_cblock(range->end);
3332 uint64_t n = from_cblock(cache->cache_size);
3333
3334 if (b >= n) {
3335 DMERR("%s: begin cblock out of range: %llu >= %llu",
3336 cache_device_name(cache), b, n);
3337 return -EINVAL;
3338 }
3339
3340 if (e > n) {
3341 DMERR("%s: end cblock out of range: %llu > %llu",
3342 cache_device_name(cache), e, n);
3343 return -EINVAL;
3344 }
3345
3346 if (b >= e) {
3347 DMERR("%s: invalid cblock range: %llu >= %llu",
3348 cache_device_name(cache), b, e);
3349 return -EINVAL;
3350 }
3351
3352 return 0;
3353}
3354
3355static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3356{
3357 return to_cblock(from_cblock(b) + 1);
3358}
3359
3360static int request_invalidation(struct cache *cache, struct cblock_range *range)
3361{
3362 int r = 0;
3363
3364
3365
3366
3367
3368
3369
3370 while (range->begin != range->end) {
3371 r = invalidate_cblock(cache, range->begin);
3372 if (r)
3373 return r;
3374
3375 range->begin = cblock_succ(range->begin);
3376 }
3377
3378 cache->commit_requested = true;
3379 return r;
3380}
3381
3382static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3383 const char **cblock_ranges)
3384{
3385 int r = 0;
3386 unsigned i;
3387 struct cblock_range range;
3388
3389 if (!passthrough_mode(cache)) {
3390 DMERR("%s: cache has to be in passthrough mode for invalidation",
3391 cache_device_name(cache));
3392 return -EPERM;
3393 }
3394
3395 for (i = 0; i < count; i++) {
3396 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3397 if (r)
3398 break;
3399
3400 r = validate_cblock_range(cache, &range);
3401 if (r)
3402 break;
3403
3404
3405
3406
3407 r = request_invalidation(cache, &range);
3408 if (r)
3409 break;
3410 }
3411
3412 return r;
3413}
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
3424 char *result, unsigned maxlen)
3425{
3426 struct cache *cache = ti->private;
3427
3428 if (!argc)
3429 return -EINVAL;
3430
3431 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3432 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3433 cache_device_name(cache));
3434 return -EOPNOTSUPP;
3435 }
3436
3437 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3438 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3439
3440 if (argc != 2)
3441 return -EINVAL;
3442
3443 return set_config_value(cache, argv[0], argv[1]);
3444}
3445
3446static int cache_iterate_devices(struct dm_target *ti,
3447 iterate_devices_callout_fn fn, void *data)
3448{
3449 int r = 0;
3450 struct cache *cache = ti->private;
3451
3452 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3453 if (!r)
3454 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3455
3456 return r;
3457}
3458
3459static bool origin_dev_supports_discard(struct block_device *origin_bdev)
3460{
3461 struct request_queue *q = bdev_get_queue(origin_bdev);
3462
3463 return q && blk_queue_discard(q);
3464}
3465
3466
3467
3468
3469
3470static void disable_passdown_if_not_supported(struct cache *cache)
3471{
3472 struct block_device *origin_bdev = cache->origin_dev->bdev;
3473 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3474 const char *reason = NULL;
3475 char buf[BDEVNAME_SIZE];
3476
3477 if (!cache->features.discard_passdown)
3478 return;
3479
3480 if (!origin_dev_supports_discard(origin_bdev))
3481 reason = "discard unsupported";
3482
3483 else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
3484 reason = "max discard sectors smaller than a block";
3485
3486 if (reason) {
3487 DMWARN("Origin device (%s) %s: Disabling discard passdown.",
3488 bdevname(origin_bdev, buf), reason);
3489 cache->features.discard_passdown = false;
3490 }
3491}
3492
3493static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3494{
3495 struct block_device *origin_bdev = cache->origin_dev->bdev;
3496 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3497
3498 if (!cache->features.discard_passdown) {
3499
3500 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3501 cache->origin_sectors);
3502 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3503 return;
3504 }
3505
3506
3507
3508
3509
3510 limits->max_discard_sectors = origin_limits->max_discard_sectors;
3511 limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
3512 limits->discard_granularity = origin_limits->discard_granularity;
3513 limits->discard_alignment = origin_limits->discard_alignment;
3514 limits->discard_misaligned = origin_limits->discard_misaligned;
3515}
3516
3517static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3518{
3519 struct cache *cache = ti->private;
3520 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3521
3522
3523
3524
3525
3526 if (io_opt_sectors < cache->sectors_per_block ||
3527 do_div(io_opt_sectors, cache->sectors_per_block)) {
3528 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3529 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3530 }
3531
3532 disable_passdown_if_not_supported(cache);
3533 set_discard_limits(cache, limits);
3534}
3535
3536
3537
3538static struct target_type cache_target = {
3539 .name = "cache",
3540 .version = {2, 1, 0},
3541 .module = THIS_MODULE,
3542 .ctr = cache_ctr,
3543 .dtr = cache_dtr,
3544 .map = cache_map,
3545 .end_io = cache_end_io,
3546 .postsuspend = cache_postsuspend,
3547 .preresume = cache_preresume,
3548 .resume = cache_resume,
3549 .status = cache_status,
3550 .message = cache_message,
3551 .iterate_devices = cache_iterate_devices,
3552 .io_hints = cache_io_hints,
3553};
3554
3555static int __init dm_cache_init(void)
3556{
3557 int r;
3558
3559 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3560 if (!migration_cache)
3561 return -ENOMEM;
3562
3563 r = dm_register_target(&cache_target);
3564 if (r) {
3565 DMERR("cache target registration failed: %d", r);
3566 kmem_cache_destroy(migration_cache);
3567 return r;
3568 }
3569
3570 return 0;
3571}
3572
3573static void __exit dm_cache_exit(void)
3574{
3575 dm_unregister_target(&cache_target);
3576 kmem_cache_destroy(migration_cache);
3577}
3578
3579module_init(dm_cache_init);
3580module_exit(dm_cache_exit);
3581
3582MODULE_DESCRIPTION(DM_NAME " cache target");
3583MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3584MODULE_LICENSE("GPL");
3585