1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison-v2.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/rwsem.h>
19#include <linux/slab.h>
20#include <linux/vmalloc.h>
21
22#define DM_MSG_PREFIX "cache"
23
24DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
25 "A percentage of time allocated for copying to and/or from cache");
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42struct io_tracker {
43 spinlock_t lock;
44
45
46
47
48 sector_t in_flight;
49
50
51
52
53
54 unsigned long idle_time;
55 unsigned long last_update_time;
56};
57
58static void iot_init(struct io_tracker *iot)
59{
60 spin_lock_init(&iot->lock);
61 iot->in_flight = 0ul;
62 iot->idle_time = 0ul;
63 iot->last_update_time = jiffies;
64}
65
66static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
67{
68 if (iot->in_flight)
69 return false;
70
71 return time_after(jiffies, iot->idle_time + jifs);
72}
73
74static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
75{
76 bool r;
77
78 spin_lock_irq(&iot->lock);
79 r = __iot_idle_for(iot, jifs);
80 spin_unlock_irq(&iot->lock);
81
82 return r;
83}
84
85static void iot_io_begin(struct io_tracker *iot, sector_t len)
86{
87 spin_lock_irq(&iot->lock);
88 iot->in_flight += len;
89 spin_unlock_irq(&iot->lock);
90}
91
92static void __iot_io_end(struct io_tracker *iot, sector_t len)
93{
94 if (!len)
95 return;
96
97 iot->in_flight -= len;
98 if (!iot->in_flight)
99 iot->idle_time = jiffies;
100}
101
102static void iot_io_end(struct io_tracker *iot, sector_t len)
103{
104 unsigned long flags;
105
106 spin_lock_irqsave(&iot->lock, flags);
107 __iot_io_end(iot, len);
108 spin_unlock_irqrestore(&iot->lock, flags);
109}
110
111
112
113
114
115
116
117struct continuation {
118 struct work_struct ws;
119 blk_status_t input;
120};
121
122static inline void init_continuation(struct continuation *k,
123 void (*fn)(struct work_struct *))
124{
125 INIT_WORK(&k->ws, fn);
126 k->input = 0;
127}
128
129static inline void queue_continuation(struct workqueue_struct *wq,
130 struct continuation *k)
131{
132 queue_work(wq, &k->ws);
133}
134
135
136
137
138
139
140
141struct batcher {
142
143
144
145 blk_status_t (*commit_op)(void *context);
146 void *commit_context;
147
148
149
150
151
152 void (*issue_op)(struct bio *bio, void *context);
153 void *issue_context;
154
155
156
157
158 struct workqueue_struct *wq;
159
160 spinlock_t lock;
161 struct list_head work_items;
162 struct bio_list bios;
163 struct work_struct commit_work;
164
165 bool commit_scheduled;
166};
167
168static void __commit(struct work_struct *_ws)
169{
170 struct batcher *b = container_of(_ws, struct batcher, commit_work);
171 blk_status_t r;
172 struct list_head work_items;
173 struct work_struct *ws, *tmp;
174 struct continuation *k;
175 struct bio *bio;
176 struct bio_list bios;
177
178 INIT_LIST_HEAD(&work_items);
179 bio_list_init(&bios);
180
181
182
183
184
185 spin_lock_irq(&b->lock);
186 list_splice_init(&b->work_items, &work_items);
187 bio_list_merge(&bios, &b->bios);
188 bio_list_init(&b->bios);
189 b->commit_scheduled = false;
190 spin_unlock_irq(&b->lock);
191
192 r = b->commit_op(b->commit_context);
193
194 list_for_each_entry_safe(ws, tmp, &work_items, entry) {
195 k = container_of(ws, struct continuation, ws);
196 k->input = r;
197 INIT_LIST_HEAD(&ws->entry);
198 queue_work(b->wq, ws);
199 }
200
201 while ((bio = bio_list_pop(&bios))) {
202 if (r) {
203 bio->bi_status = r;
204 bio_endio(bio);
205 } else
206 b->issue_op(bio, b->issue_context);
207 }
208}
209
210static void batcher_init(struct batcher *b,
211 blk_status_t (*commit_op)(void *),
212 void *commit_context,
213 void (*issue_op)(struct bio *bio, void *),
214 void *issue_context,
215 struct workqueue_struct *wq)
216{
217 b->commit_op = commit_op;
218 b->commit_context = commit_context;
219 b->issue_op = issue_op;
220 b->issue_context = issue_context;
221 b->wq = wq;
222
223 spin_lock_init(&b->lock);
224 INIT_LIST_HEAD(&b->work_items);
225 bio_list_init(&b->bios);
226 INIT_WORK(&b->commit_work, __commit);
227 b->commit_scheduled = false;
228}
229
230static void async_commit(struct batcher *b)
231{
232 queue_work(b->wq, &b->commit_work);
233}
234
235static void continue_after_commit(struct batcher *b, struct continuation *k)
236{
237 bool commit_scheduled;
238
239 spin_lock_irq(&b->lock);
240 commit_scheduled = b->commit_scheduled;
241 list_add_tail(&k->ws.entry, &b->work_items);
242 spin_unlock_irq(&b->lock);
243
244 if (commit_scheduled)
245 async_commit(b);
246}
247
248
249
250
251static void issue_after_commit(struct batcher *b, struct bio *bio)
252{
253 bool commit_scheduled;
254
255 spin_lock_irq(&b->lock);
256 commit_scheduled = b->commit_scheduled;
257 bio_list_add(&b->bios, bio);
258 spin_unlock_irq(&b->lock);
259
260 if (commit_scheduled)
261 async_commit(b);
262}
263
264
265
266
267static void schedule_commit(struct batcher *b)
268{
269 bool immediate;
270
271 spin_lock_irq(&b->lock);
272 immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
273 b->commit_scheduled = true;
274 spin_unlock_irq(&b->lock);
275
276 if (immediate)
277 async_commit(b);
278}
279
280
281
282
283
284
285struct dm_hook_info {
286 bio_end_io_t *bi_end_io;
287};
288
289static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
290 bio_end_io_t *bi_end_io, void *bi_private)
291{
292 h->bi_end_io = bio->bi_end_io;
293
294 bio->bi_end_io = bi_end_io;
295 bio->bi_private = bi_private;
296}
297
298static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
299{
300 bio->bi_end_io = h->bi_end_io;
301}
302
303
304
305#define MIGRATION_POOL_SIZE 128
306#define COMMIT_PERIOD HZ
307#define MIGRATION_COUNT_WINDOW 10
308
309
310
311
312
313#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
314#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
315
316enum cache_metadata_mode {
317 CM_WRITE,
318 CM_READ_ONLY,
319 CM_FAIL
320};
321
322enum cache_io_mode {
323
324
325
326
327
328 CM_IO_WRITEBACK,
329
330
331
332
333
334 CM_IO_WRITETHROUGH,
335
336
337
338
339
340
341
342 CM_IO_PASSTHROUGH
343};
344
345struct cache_features {
346 enum cache_metadata_mode mode;
347 enum cache_io_mode io_mode;
348 unsigned metadata_version;
349 bool discard_passdown:1;
350};
351
352struct cache_stats {
353 atomic_t read_hit;
354 atomic_t read_miss;
355 atomic_t write_hit;
356 atomic_t write_miss;
357 atomic_t demotion;
358 atomic_t promotion;
359 atomic_t writeback;
360 atomic_t copies_avoided;
361 atomic_t cache_cell_clash;
362 atomic_t commit_count;
363 atomic_t discard_count;
364};
365
366struct cache {
367 struct dm_target *ti;
368 spinlock_t lock;
369
370
371
372
373 int sectors_per_block_shift;
374 sector_t sectors_per_block;
375
376 struct dm_cache_metadata *cmd;
377
378
379
380
381 struct dm_dev *metadata_dev;
382
383
384
385
386 struct dm_dev *origin_dev;
387
388
389
390
391 struct dm_dev *cache_dev;
392
393
394
395
396 dm_oblock_t origin_blocks;
397 sector_t origin_sectors;
398
399
400
401
402 dm_cblock_t cache_size;
403
404
405
406
407 spinlock_t invalidation_lock;
408 struct list_head invalidation_requests;
409
410 sector_t migration_threshold;
411 wait_queue_head_t migration_wait;
412 atomic_t nr_allocated_migrations;
413
414
415
416
417
418 atomic_t nr_io_migrations;
419
420 struct bio_list deferred_bios;
421
422 struct rw_semaphore quiesce_lock;
423
424
425
426
427 dm_dblock_t discard_nr_blocks;
428 unsigned long *discard_bitset;
429 uint32_t discard_block_size;
430
431
432
433
434
435 unsigned nr_ctr_args;
436 const char **ctr_args;
437
438 struct dm_kcopyd_client *copier;
439 struct work_struct deferred_bio_worker;
440 struct work_struct migration_worker;
441 struct workqueue_struct *wq;
442 struct delayed_work waker;
443 struct dm_bio_prison_v2 *prison;
444
445
446
447
448 unsigned long *dirty_bitset;
449 atomic_t nr_dirty;
450
451 unsigned policy_nr_args;
452 struct dm_cache_policy *policy;
453
454
455
456
457 struct cache_features features;
458
459 struct cache_stats stats;
460
461 bool need_tick_bio:1;
462 bool sized:1;
463 bool invalidate:1;
464 bool commit_requested:1;
465 bool loaded_mappings:1;
466 bool loaded_discards:1;
467
468 struct rw_semaphore background_work_lock;
469
470 struct batcher committer;
471 struct work_struct commit_ws;
472
473 struct io_tracker tracker;
474
475 mempool_t migration_pool;
476
477 struct bio_set bs;
478};
479
480struct per_bio_data {
481 bool tick:1;
482 unsigned req_nr:2;
483 struct dm_bio_prison_cell_v2 *cell;
484 struct dm_hook_info hook_info;
485 sector_t len;
486};
487
488struct dm_cache_migration {
489 struct continuation k;
490 struct cache *cache;
491
492 struct policy_work *op;
493 struct bio *overwrite_bio;
494 struct dm_bio_prison_cell_v2 *cell;
495
496 dm_cblock_t invalidate_cblock;
497 dm_oblock_t invalidate_oblock;
498};
499
500
501
502static bool writethrough_mode(struct cache *cache)
503{
504 return cache->features.io_mode == CM_IO_WRITETHROUGH;
505}
506
507static bool writeback_mode(struct cache *cache)
508{
509 return cache->features.io_mode == CM_IO_WRITEBACK;
510}
511
512static inline bool passthrough_mode(struct cache *cache)
513{
514 return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
515}
516
517
518
519static void wake_deferred_bio_worker(struct cache *cache)
520{
521 queue_work(cache->wq, &cache->deferred_bio_worker);
522}
523
524static void wake_migration_worker(struct cache *cache)
525{
526 if (passthrough_mode(cache))
527 return;
528
529 queue_work(cache->wq, &cache->migration_worker);
530}
531
532
533
534static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
535{
536 return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
537}
538
539static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
540{
541 dm_bio_prison_free_cell_v2(cache->prison, cell);
542}
543
544static struct dm_cache_migration *alloc_migration(struct cache *cache)
545{
546 struct dm_cache_migration *mg;
547
548 mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
549
550 memset(mg, 0, sizeof(*mg));
551
552 mg->cache = cache;
553 atomic_inc(&cache->nr_allocated_migrations);
554
555 return mg;
556}
557
558static void free_migration(struct dm_cache_migration *mg)
559{
560 struct cache *cache = mg->cache;
561
562 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
563 wake_up(&cache->migration_wait);
564
565 mempool_free(mg, &cache->migration_pool);
566}
567
568
569
570static inline dm_oblock_t oblock_succ(dm_oblock_t b)
571{
572 return to_oblock(from_oblock(b) + 1ull);
573}
574
575static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
576{
577 key->virtual = 0;
578 key->dev = 0;
579 key->block_begin = from_oblock(begin);
580 key->block_end = from_oblock(end);
581}
582
583
584
585
586
587#define WRITE_LOCK_LEVEL 0
588#define READ_WRITE_LOCK_LEVEL 1
589
590static unsigned lock_level(struct bio *bio)
591{
592 return bio_data_dir(bio) == WRITE ?
593 WRITE_LOCK_LEVEL :
594 READ_WRITE_LOCK_LEVEL;
595}
596
597
598
599
600
601static struct per_bio_data *get_per_bio_data(struct bio *bio)
602{
603 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
604 BUG_ON(!pb);
605 return pb;
606}
607
608static struct per_bio_data *init_per_bio_data(struct bio *bio)
609{
610 struct per_bio_data *pb = get_per_bio_data(bio);
611
612 pb->tick = false;
613 pb->req_nr = dm_bio_get_target_bio_nr(bio);
614 pb->cell = NULL;
615 pb->len = 0;
616
617 return pb;
618}
619
620
621
622static void defer_bio(struct cache *cache, struct bio *bio)
623{
624 spin_lock_irq(&cache->lock);
625 bio_list_add(&cache->deferred_bios, bio);
626 spin_unlock_irq(&cache->lock);
627
628 wake_deferred_bio_worker(cache);
629}
630
631static void defer_bios(struct cache *cache, struct bio_list *bios)
632{
633 spin_lock_irq(&cache->lock);
634 bio_list_merge(&cache->deferred_bios, bios);
635 bio_list_init(bios);
636 spin_unlock_irq(&cache->lock);
637
638 wake_deferred_bio_worker(cache);
639}
640
641
642
643static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
644{
645 bool r;
646 struct per_bio_data *pb;
647 struct dm_cell_key_v2 key;
648 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
649 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
650
651 cell_prealloc = alloc_prison_cell(cache);
652
653 build_key(oblock, end, &key);
654 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
655 if (!r) {
656
657
658
659 free_prison_cell(cache, cell_prealloc);
660 return r;
661 }
662
663 if (cell != cell_prealloc)
664 free_prison_cell(cache, cell_prealloc);
665
666 pb = get_per_bio_data(bio);
667 pb->cell = cell;
668
669 return r;
670}
671
672
673
674static bool is_dirty(struct cache *cache, dm_cblock_t b)
675{
676 return test_bit(from_cblock(b), cache->dirty_bitset);
677}
678
679static void set_dirty(struct cache *cache, dm_cblock_t cblock)
680{
681 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
682 atomic_inc(&cache->nr_dirty);
683 policy_set_dirty(cache->policy, cblock);
684 }
685}
686
687
688
689
690
691static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
692{
693 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
694 atomic_inc(&cache->nr_dirty);
695 policy_set_dirty(cache->policy, cblock);
696}
697
698static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
699{
700 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
701 if (atomic_dec_return(&cache->nr_dirty) == 0)
702 dm_table_event(cache->ti->table);
703 }
704
705 policy_clear_dirty(cache->policy, cblock);
706}
707
708
709
710static bool block_size_is_power_of_two(struct cache *cache)
711{
712 return cache->sectors_per_block_shift >= 0;
713}
714
715static dm_block_t block_div(dm_block_t b, uint32_t n)
716{
717 do_div(b, n);
718
719 return b;
720}
721
722static dm_block_t oblocks_per_dblock(struct cache *cache)
723{
724 dm_block_t oblocks = cache->discard_block_size;
725
726 if (block_size_is_power_of_two(cache))
727 oblocks >>= cache->sectors_per_block_shift;
728 else
729 oblocks = block_div(oblocks, cache->sectors_per_block);
730
731 return oblocks;
732}
733
734static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
735{
736 return to_dblock(block_div(from_oblock(oblock),
737 oblocks_per_dblock(cache)));
738}
739
740static void set_discard(struct cache *cache, dm_dblock_t b)
741{
742 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
743 atomic_inc(&cache->stats.discard_count);
744
745 spin_lock_irq(&cache->lock);
746 set_bit(from_dblock(b), cache->discard_bitset);
747 spin_unlock_irq(&cache->lock);
748}
749
750static void clear_discard(struct cache *cache, dm_dblock_t b)
751{
752 spin_lock_irq(&cache->lock);
753 clear_bit(from_dblock(b), cache->discard_bitset);
754 spin_unlock_irq(&cache->lock);
755}
756
757static bool is_discarded(struct cache *cache, dm_dblock_t b)
758{
759 int r;
760 spin_lock_irq(&cache->lock);
761 r = test_bit(from_dblock(b), cache->discard_bitset);
762 spin_unlock_irq(&cache->lock);
763
764 return r;
765}
766
767static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
768{
769 int r;
770 spin_lock_irq(&cache->lock);
771 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
772 cache->discard_bitset);
773 spin_unlock_irq(&cache->lock);
774
775 return r;
776}
777
778
779
780
781static void remap_to_origin(struct cache *cache, struct bio *bio)
782{
783 bio_set_dev(bio, cache->origin_dev->bdev);
784}
785
786static void remap_to_cache(struct cache *cache, struct bio *bio,
787 dm_cblock_t cblock)
788{
789 sector_t bi_sector = bio->bi_iter.bi_sector;
790 sector_t block = from_cblock(cblock);
791
792 bio_set_dev(bio, cache->cache_dev->bdev);
793 if (!block_size_is_power_of_two(cache))
794 bio->bi_iter.bi_sector =
795 (block * cache->sectors_per_block) +
796 sector_div(bi_sector, cache->sectors_per_block);
797 else
798 bio->bi_iter.bi_sector =
799 (block << cache->sectors_per_block_shift) |
800 (bi_sector & (cache->sectors_per_block - 1));
801}
802
803static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
804{
805 struct per_bio_data *pb;
806
807 spin_lock_irq(&cache->lock);
808 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
809 bio_op(bio) != REQ_OP_DISCARD) {
810 pb = get_per_bio_data(bio);
811 pb->tick = true;
812 cache->need_tick_bio = false;
813 }
814 spin_unlock_irq(&cache->lock);
815}
816
817static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
818 dm_oblock_t oblock, bool bio_has_pbd)
819{
820 if (bio_has_pbd)
821 check_if_tick_bio_needed(cache, bio);
822 remap_to_origin(cache, bio);
823 if (bio_data_dir(bio) == WRITE)
824 clear_discard(cache, oblock_to_dblock(cache, oblock));
825}
826
827static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
828 dm_oblock_t oblock)
829{
830
831 __remap_to_origin_clear_discard(cache, bio, oblock, true);
832}
833
834static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
835 dm_oblock_t oblock, dm_cblock_t cblock)
836{
837 check_if_tick_bio_needed(cache, bio);
838 remap_to_cache(cache, bio, cblock);
839 if (bio_data_dir(bio) == WRITE) {
840 set_dirty(cache, cblock);
841 clear_discard(cache, oblock_to_dblock(cache, oblock));
842 }
843}
844
845static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
846{
847 sector_t block_nr = bio->bi_iter.bi_sector;
848
849 if (!block_size_is_power_of_two(cache))
850 (void) sector_div(block_nr, cache->sectors_per_block);
851 else
852 block_nr >>= cache->sectors_per_block_shift;
853
854 return to_oblock(block_nr);
855}
856
857static bool accountable_bio(struct cache *cache, struct bio *bio)
858{
859 return bio_op(bio) != REQ_OP_DISCARD;
860}
861
862static void accounted_begin(struct cache *cache, struct bio *bio)
863{
864 struct per_bio_data *pb;
865
866 if (accountable_bio(cache, bio)) {
867 pb = get_per_bio_data(bio);
868 pb->len = bio_sectors(bio);
869 iot_io_begin(&cache->tracker, pb->len);
870 }
871}
872
873static void accounted_complete(struct cache *cache, struct bio *bio)
874{
875 struct per_bio_data *pb = get_per_bio_data(bio);
876
877 iot_io_end(&cache->tracker, pb->len);
878}
879
880static void accounted_request(struct cache *cache, struct bio *bio)
881{
882 accounted_begin(cache, bio);
883 submit_bio_noacct(bio);
884}
885
886static void issue_op(struct bio *bio, void *context)
887{
888 struct cache *cache = context;
889 accounted_request(cache, bio);
890}
891
892
893
894
895
896static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
897 dm_oblock_t oblock, dm_cblock_t cblock)
898{
899 struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
900
901 BUG_ON(!origin_bio);
902
903 bio_chain(origin_bio, bio);
904
905
906
907
908 __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
909 submit_bio(origin_bio);
910
911 remap_to_cache(cache, bio, cblock);
912}
913
914
915
916
917static enum cache_metadata_mode get_cache_mode(struct cache *cache)
918{
919 return cache->features.mode;
920}
921
922static const char *cache_device_name(struct cache *cache)
923{
924 return dm_table_device_name(cache->ti->table);
925}
926
927static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
928{
929 const char *descs[] = {
930 "write",
931 "read-only",
932 "fail"
933 };
934
935 dm_table_event(cache->ti->table);
936 DMINFO("%s: switching cache to %s mode",
937 cache_device_name(cache), descs[(int)mode]);
938}
939
940static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
941{
942 bool needs_check;
943 enum cache_metadata_mode old_mode = get_cache_mode(cache);
944
945 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
946 DMERR("%s: unable to read needs_check flag, setting failure mode.",
947 cache_device_name(cache));
948 new_mode = CM_FAIL;
949 }
950
951 if (new_mode == CM_WRITE && needs_check) {
952 DMERR("%s: unable to switch cache to write mode until repaired.",
953 cache_device_name(cache));
954 if (old_mode != new_mode)
955 new_mode = old_mode;
956 else
957 new_mode = CM_READ_ONLY;
958 }
959
960
961 if (old_mode == CM_FAIL)
962 new_mode = CM_FAIL;
963
964 switch (new_mode) {
965 case CM_FAIL:
966 case CM_READ_ONLY:
967 dm_cache_metadata_set_read_only(cache->cmd);
968 break;
969
970 case CM_WRITE:
971 dm_cache_metadata_set_read_write(cache->cmd);
972 break;
973 }
974
975 cache->features.mode = new_mode;
976
977 if (new_mode != old_mode)
978 notify_mode_switch(cache, new_mode);
979}
980
981static void abort_transaction(struct cache *cache)
982{
983 const char *dev_name = cache_device_name(cache);
984
985 if (get_cache_mode(cache) >= CM_READ_ONLY)
986 return;
987
988 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
989 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
990 set_cache_mode(cache, CM_FAIL);
991 }
992
993 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
994 if (dm_cache_metadata_abort(cache->cmd)) {
995 DMERR("%s: failed to abort metadata transaction", dev_name);
996 set_cache_mode(cache, CM_FAIL);
997 }
998}
999
1000static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1001{
1002 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1003 cache_device_name(cache), op, r);
1004 abort_transaction(cache);
1005 set_cache_mode(cache, CM_READ_ONLY);
1006}
1007
1008
1009
1010static void load_stats(struct cache *cache)
1011{
1012 struct dm_cache_statistics stats;
1013
1014 dm_cache_metadata_get_stats(cache->cmd, &stats);
1015 atomic_set(&cache->stats.read_hit, stats.read_hits);
1016 atomic_set(&cache->stats.read_miss, stats.read_misses);
1017 atomic_set(&cache->stats.write_hit, stats.write_hits);
1018 atomic_set(&cache->stats.write_miss, stats.write_misses);
1019}
1020
1021static void save_stats(struct cache *cache)
1022{
1023 struct dm_cache_statistics stats;
1024
1025 if (get_cache_mode(cache) >= CM_READ_ONLY)
1026 return;
1027
1028 stats.read_hits = atomic_read(&cache->stats.read_hit);
1029 stats.read_misses = atomic_read(&cache->stats.read_miss);
1030 stats.write_hits = atomic_read(&cache->stats.write_hit);
1031 stats.write_misses = atomic_read(&cache->stats.write_miss);
1032
1033 dm_cache_metadata_set_stats(cache->cmd, &stats);
1034}
1035
1036static void update_stats(struct cache_stats *stats, enum policy_operation op)
1037{
1038 switch (op) {
1039 case POLICY_PROMOTE:
1040 atomic_inc(&stats->promotion);
1041 break;
1042
1043 case POLICY_DEMOTE:
1044 atomic_inc(&stats->demotion);
1045 break;
1046
1047 case POLICY_WRITEBACK:
1048 atomic_inc(&stats->writeback);
1049 break;
1050 }
1051}
1052
1053
1054
1055
1056
1057
1058
1059
1060static void inc_io_migrations(struct cache *cache)
1061{
1062 atomic_inc(&cache->nr_io_migrations);
1063}
1064
1065static void dec_io_migrations(struct cache *cache)
1066{
1067 atomic_dec(&cache->nr_io_migrations);
1068}
1069
1070static bool discard_or_flush(struct bio *bio)
1071{
1072 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1073}
1074
1075static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1076 dm_dblock_t *b, dm_dblock_t *e)
1077{
1078 sector_t sb = bio->bi_iter.bi_sector;
1079 sector_t se = bio_end_sector(bio);
1080
1081 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1082
1083 if (se - sb < cache->discard_block_size)
1084 *e = *b;
1085 else
1086 *e = to_dblock(block_div(se, cache->discard_block_size));
1087}
1088
1089
1090
1091static void prevent_background_work(struct cache *cache)
1092{
1093 lockdep_off();
1094 down_write(&cache->background_work_lock);
1095 lockdep_on();
1096}
1097
1098static void allow_background_work(struct cache *cache)
1099{
1100 lockdep_off();
1101 up_write(&cache->background_work_lock);
1102 lockdep_on();
1103}
1104
1105static bool background_work_begin(struct cache *cache)
1106{
1107 bool r;
1108
1109 lockdep_off();
1110 r = down_read_trylock(&cache->background_work_lock);
1111 lockdep_on();
1112
1113 return r;
1114}
1115
1116static void background_work_end(struct cache *cache)
1117{
1118 lockdep_off();
1119 up_read(&cache->background_work_lock);
1120 lockdep_on();
1121}
1122
1123
1124
1125static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1126{
1127 return (bio_data_dir(bio) == WRITE) &&
1128 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1129}
1130
1131static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1132{
1133 return writeback_mode(cache) &&
1134 (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1135}
1136
1137static void quiesce(struct dm_cache_migration *mg,
1138 void (*continuation)(struct work_struct *))
1139{
1140 init_continuation(&mg->k, continuation);
1141 dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1142}
1143
1144static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1145{
1146 struct continuation *k = container_of(ws, struct continuation, ws);
1147 return container_of(k, struct dm_cache_migration, k);
1148}
1149
1150static void copy_complete(int read_err, unsigned long write_err, void *context)
1151{
1152 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1153
1154 if (read_err || write_err)
1155 mg->k.input = BLK_STS_IOERR;
1156
1157 queue_continuation(mg->cache->wq, &mg->k);
1158}
1159
1160static void copy(struct dm_cache_migration *mg, bool promote)
1161{
1162 struct dm_io_region o_region, c_region;
1163 struct cache *cache = mg->cache;
1164
1165 o_region.bdev = cache->origin_dev->bdev;
1166 o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1167 o_region.count = cache->sectors_per_block;
1168
1169 c_region.bdev = cache->cache_dev->bdev;
1170 c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1171 c_region.count = cache->sectors_per_block;
1172
1173 if (promote)
1174 dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1175 else
1176 dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1177}
1178
1179static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1180{
1181 struct per_bio_data *pb = get_per_bio_data(bio);
1182
1183 if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1184 free_prison_cell(cache, pb->cell);
1185 pb->cell = NULL;
1186}
1187
1188static void overwrite_endio(struct bio *bio)
1189{
1190 struct dm_cache_migration *mg = bio->bi_private;
1191 struct cache *cache = mg->cache;
1192 struct per_bio_data *pb = get_per_bio_data(bio);
1193
1194 dm_unhook_bio(&pb->hook_info, bio);
1195
1196 if (bio->bi_status)
1197 mg->k.input = bio->bi_status;
1198
1199 queue_continuation(cache->wq, &mg->k);
1200}
1201
1202static void overwrite(struct dm_cache_migration *mg,
1203 void (*continuation)(struct work_struct *))
1204{
1205 struct bio *bio = mg->overwrite_bio;
1206 struct per_bio_data *pb = get_per_bio_data(bio);
1207
1208 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1209
1210
1211
1212
1213
1214 if (mg->op->op == POLICY_PROMOTE)
1215 remap_to_cache(mg->cache, bio, mg->op->cblock);
1216 else
1217 remap_to_origin(mg->cache, bio);
1218
1219 init_continuation(&mg->k, continuation);
1220 accounted_request(mg->cache, bio);
1221}
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234static void mg_complete(struct dm_cache_migration *mg, bool success)
1235{
1236 struct bio_list bios;
1237 struct cache *cache = mg->cache;
1238 struct policy_work *op = mg->op;
1239 dm_cblock_t cblock = op->cblock;
1240
1241 if (success)
1242 update_stats(&cache->stats, op->op);
1243
1244 switch (op->op) {
1245 case POLICY_PROMOTE:
1246 clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1247 policy_complete_background_work(cache->policy, op, success);
1248
1249 if (mg->overwrite_bio) {
1250 if (success)
1251 force_set_dirty(cache, cblock);
1252 else if (mg->k.input)
1253 mg->overwrite_bio->bi_status = mg->k.input;
1254 else
1255 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1256 bio_endio(mg->overwrite_bio);
1257 } else {
1258 if (success)
1259 force_clear_dirty(cache, cblock);
1260 dec_io_migrations(cache);
1261 }
1262 break;
1263
1264 case POLICY_DEMOTE:
1265
1266
1267
1268 if (success)
1269 force_clear_dirty(cache, cblock);
1270 policy_complete_background_work(cache->policy, op, success);
1271 dec_io_migrations(cache);
1272 break;
1273
1274 case POLICY_WRITEBACK:
1275 if (success)
1276 force_clear_dirty(cache, cblock);
1277 policy_complete_background_work(cache->policy, op, success);
1278 dec_io_migrations(cache);
1279 break;
1280 }
1281
1282 bio_list_init(&bios);
1283 if (mg->cell) {
1284 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1285 free_prison_cell(cache, mg->cell);
1286 }
1287
1288 free_migration(mg);
1289 defer_bios(cache, &bios);
1290 wake_migration_worker(cache);
1291
1292 background_work_end(cache);
1293}
1294
1295static void mg_success(struct work_struct *ws)
1296{
1297 struct dm_cache_migration *mg = ws_to_mg(ws);
1298 mg_complete(mg, mg->k.input == 0);
1299}
1300
1301static void mg_update_metadata(struct work_struct *ws)
1302{
1303 int r;
1304 struct dm_cache_migration *mg = ws_to_mg(ws);
1305 struct cache *cache = mg->cache;
1306 struct policy_work *op = mg->op;
1307
1308 switch (op->op) {
1309 case POLICY_PROMOTE:
1310 r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1311 if (r) {
1312 DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1313 cache_device_name(cache));
1314 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1315
1316 mg_complete(mg, false);
1317 return;
1318 }
1319 mg_complete(mg, true);
1320 break;
1321
1322 case POLICY_DEMOTE:
1323 r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1324 if (r) {
1325 DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1326 cache_device_name(cache));
1327 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1328
1329 mg_complete(mg, false);
1330 return;
1331 }
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 init_continuation(&mg->k, mg_success);
1353 continue_after_commit(&cache->committer, &mg->k);
1354 schedule_commit(&cache->committer);
1355 break;
1356
1357 case POLICY_WRITEBACK:
1358 mg_complete(mg, true);
1359 break;
1360 }
1361}
1362
1363static void mg_update_metadata_after_copy(struct work_struct *ws)
1364{
1365 struct dm_cache_migration *mg = ws_to_mg(ws);
1366
1367
1368
1369
1370 if (mg->k.input)
1371 mg_complete(mg, false);
1372 else
1373 mg_update_metadata(ws);
1374}
1375
1376static void mg_upgrade_lock(struct work_struct *ws)
1377{
1378 int r;
1379 struct dm_cache_migration *mg = ws_to_mg(ws);
1380
1381
1382
1383
1384 if (mg->k.input)
1385 mg_complete(mg, false);
1386
1387 else {
1388
1389
1390
1391 r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1392 READ_WRITE_LOCK_LEVEL);
1393 if (r < 0)
1394 mg_complete(mg, false);
1395
1396 else if (r)
1397 quiesce(mg, mg_update_metadata);
1398
1399 else
1400 mg_update_metadata(ws);
1401 }
1402}
1403
1404static void mg_full_copy(struct work_struct *ws)
1405{
1406 struct dm_cache_migration *mg = ws_to_mg(ws);
1407 struct cache *cache = mg->cache;
1408 struct policy_work *op = mg->op;
1409 bool is_policy_promote = (op->op == POLICY_PROMOTE);
1410
1411 if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1412 is_discarded_oblock(cache, op->oblock)) {
1413 mg_upgrade_lock(ws);
1414 return;
1415 }
1416
1417 init_continuation(&mg->k, mg_upgrade_lock);
1418 copy(mg, is_policy_promote);
1419}
1420
1421static void mg_copy(struct work_struct *ws)
1422{
1423 struct dm_cache_migration *mg = ws_to_mg(ws);
1424
1425 if (mg->overwrite_bio) {
1426
1427
1428
1429
1430
1431 if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
1432
1433
1434
1435 bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
1436 BUG_ON(rb);
1437 mg->overwrite_bio = NULL;
1438 inc_io_migrations(mg->cache);
1439 mg_full_copy(ws);
1440 return;
1441 }
1442
1443
1444
1445
1446
1447
1448
1449
1450 overwrite(mg, mg_update_metadata_after_copy);
1451
1452 } else
1453 mg_full_copy(ws);
1454}
1455
1456static int mg_lock_writes(struct dm_cache_migration *mg)
1457{
1458 int r;
1459 struct dm_cell_key_v2 key;
1460 struct cache *cache = mg->cache;
1461 struct dm_bio_prison_cell_v2 *prealloc;
1462
1463 prealloc = alloc_prison_cell(cache);
1464
1465
1466
1467
1468
1469
1470 build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1471 r = dm_cell_lock_v2(cache->prison, &key,
1472 mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1473 prealloc, &mg->cell);
1474 if (r < 0) {
1475 free_prison_cell(cache, prealloc);
1476 mg_complete(mg, false);
1477 return r;
1478 }
1479
1480 if (mg->cell != prealloc)
1481 free_prison_cell(cache, prealloc);
1482
1483 if (r == 0)
1484 mg_copy(&mg->k.ws);
1485 else
1486 quiesce(mg, mg_copy);
1487
1488 return 0;
1489}
1490
1491static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1492{
1493 struct dm_cache_migration *mg;
1494
1495 if (!background_work_begin(cache)) {
1496 policy_complete_background_work(cache->policy, op, false);
1497 return -EPERM;
1498 }
1499
1500 mg = alloc_migration(cache);
1501
1502 mg->op = op;
1503 mg->overwrite_bio = bio;
1504
1505 if (!bio)
1506 inc_io_migrations(cache);
1507
1508 return mg_lock_writes(mg);
1509}
1510
1511
1512
1513
1514
1515static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1516{
1517 struct bio_list bios;
1518 struct cache *cache = mg->cache;
1519
1520 bio_list_init(&bios);
1521 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1522 free_prison_cell(cache, mg->cell);
1523
1524 if (!success && mg->overwrite_bio)
1525 bio_io_error(mg->overwrite_bio);
1526
1527 free_migration(mg);
1528 defer_bios(cache, &bios);
1529
1530 background_work_end(cache);
1531}
1532
1533static void invalidate_completed(struct work_struct *ws)
1534{
1535 struct dm_cache_migration *mg = ws_to_mg(ws);
1536 invalidate_complete(mg, !mg->k.input);
1537}
1538
1539static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1540{
1541 int r = policy_invalidate_mapping(cache->policy, cblock);
1542 if (!r) {
1543 r = dm_cache_remove_mapping(cache->cmd, cblock);
1544 if (r) {
1545 DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1546 cache_device_name(cache));
1547 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1548 }
1549
1550 } else if (r == -ENODATA) {
1551
1552
1553
1554 r = 0;
1555
1556 } else
1557 DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1558
1559 return r;
1560}
1561
1562static void invalidate_remove(struct work_struct *ws)
1563{
1564 int r;
1565 struct dm_cache_migration *mg = ws_to_mg(ws);
1566 struct cache *cache = mg->cache;
1567
1568 r = invalidate_cblock(cache, mg->invalidate_cblock);
1569 if (r) {
1570 invalidate_complete(mg, false);
1571 return;
1572 }
1573
1574 init_continuation(&mg->k, invalidate_completed);
1575 continue_after_commit(&cache->committer, &mg->k);
1576 remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1577 mg->overwrite_bio = NULL;
1578 schedule_commit(&cache->committer);
1579}
1580
1581static int invalidate_lock(struct dm_cache_migration *mg)
1582{
1583 int r;
1584 struct dm_cell_key_v2 key;
1585 struct cache *cache = mg->cache;
1586 struct dm_bio_prison_cell_v2 *prealloc;
1587
1588 prealloc = alloc_prison_cell(cache);
1589
1590 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1591 r = dm_cell_lock_v2(cache->prison, &key,
1592 READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1593 if (r < 0) {
1594 free_prison_cell(cache, prealloc);
1595 invalidate_complete(mg, false);
1596 return r;
1597 }
1598
1599 if (mg->cell != prealloc)
1600 free_prison_cell(cache, prealloc);
1601
1602 if (r)
1603 quiesce(mg, invalidate_remove);
1604
1605 else {
1606
1607
1608
1609
1610 init_continuation(&mg->k, invalidate_remove);
1611 queue_work(cache->wq, &mg->k.ws);
1612 }
1613
1614 return 0;
1615}
1616
1617static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1618 dm_oblock_t oblock, struct bio *bio)
1619{
1620 struct dm_cache_migration *mg;
1621
1622 if (!background_work_begin(cache))
1623 return -EPERM;
1624
1625 mg = alloc_migration(cache);
1626
1627 mg->overwrite_bio = bio;
1628 mg->invalidate_cblock = cblock;
1629 mg->invalidate_oblock = oblock;
1630
1631 return invalidate_lock(mg);
1632}
1633
1634
1635
1636
1637
1638enum busy {
1639 IDLE,
1640 BUSY
1641};
1642
1643static enum busy spare_migration_bandwidth(struct cache *cache)
1644{
1645 bool idle = iot_idle_for(&cache->tracker, HZ);
1646 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1647 cache->sectors_per_block;
1648
1649 if (idle && current_volume <= cache->migration_threshold)
1650 return IDLE;
1651 else
1652 return BUSY;
1653}
1654
1655static void inc_hit_counter(struct cache *cache, struct bio *bio)
1656{
1657 atomic_inc(bio_data_dir(bio) == READ ?
1658 &cache->stats.read_hit : &cache->stats.write_hit);
1659}
1660
1661static void inc_miss_counter(struct cache *cache, struct bio *bio)
1662{
1663 atomic_inc(bio_data_dir(bio) == READ ?
1664 &cache->stats.read_miss : &cache->stats.write_miss);
1665}
1666
1667
1668
1669static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1670 bool *commit_needed)
1671{
1672 int r, data_dir;
1673 bool rb, background_queued;
1674 dm_cblock_t cblock;
1675
1676 *commit_needed = false;
1677
1678 rb = bio_detain_shared(cache, block, bio);
1679 if (!rb) {
1680
1681
1682
1683
1684
1685
1686 *commit_needed = true;
1687 return DM_MAPIO_SUBMITTED;
1688 }
1689
1690 data_dir = bio_data_dir(bio);
1691
1692 if (optimisable_bio(cache, bio, block)) {
1693 struct policy_work *op = NULL;
1694
1695 r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1696 if (unlikely(r && r != -ENOENT)) {
1697 DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1698 cache_device_name(cache), r);
1699 bio_io_error(bio);
1700 return DM_MAPIO_SUBMITTED;
1701 }
1702
1703 if (r == -ENOENT && op) {
1704 bio_drop_shared_lock(cache, bio);
1705 BUG_ON(op->op != POLICY_PROMOTE);
1706 mg_start(cache, op, bio);
1707 return DM_MAPIO_SUBMITTED;
1708 }
1709 } else {
1710 r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1711 if (unlikely(r && r != -ENOENT)) {
1712 DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1713 cache_device_name(cache), r);
1714 bio_io_error(bio);
1715 return DM_MAPIO_SUBMITTED;
1716 }
1717
1718 if (background_queued)
1719 wake_migration_worker(cache);
1720 }
1721
1722 if (r == -ENOENT) {
1723 struct per_bio_data *pb = get_per_bio_data(bio);
1724
1725
1726
1727
1728 inc_miss_counter(cache, bio);
1729 if (pb->req_nr == 0) {
1730 accounted_begin(cache, bio);
1731 remap_to_origin_clear_discard(cache, bio, block);
1732 } else {
1733
1734
1735
1736
1737 bio_endio(bio);
1738 return DM_MAPIO_SUBMITTED;
1739 }
1740 } else {
1741
1742
1743
1744 inc_hit_counter(cache, bio);
1745
1746
1747
1748
1749
1750 if (passthrough_mode(cache)) {
1751 if (bio_data_dir(bio) == WRITE) {
1752 bio_drop_shared_lock(cache, bio);
1753 atomic_inc(&cache->stats.demotion);
1754 invalidate_start(cache, cblock, block, bio);
1755 } else
1756 remap_to_origin_clear_discard(cache, bio, block);
1757 } else {
1758 if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
1759 !is_dirty(cache, cblock)) {
1760 remap_to_origin_and_cache(cache, bio, block, cblock);
1761 accounted_begin(cache, bio);
1762 } else
1763 remap_to_cache_dirty(cache, bio, block, cblock);
1764 }
1765 }
1766
1767
1768
1769
1770 if (bio->bi_opf & REQ_FUA) {
1771
1772
1773
1774
1775 accounted_complete(cache, bio);
1776 issue_after_commit(&cache->committer, bio);
1777 *commit_needed = true;
1778 return DM_MAPIO_SUBMITTED;
1779 }
1780
1781 return DM_MAPIO_REMAPPED;
1782}
1783
1784static bool process_bio(struct cache *cache, struct bio *bio)
1785{
1786 bool commit_needed;
1787
1788 if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1789 submit_bio_noacct(bio);
1790
1791 return commit_needed;
1792}
1793
1794
1795
1796
1797static int commit(struct cache *cache, bool clean_shutdown)
1798{
1799 int r;
1800
1801 if (get_cache_mode(cache) >= CM_READ_ONLY)
1802 return -EINVAL;
1803
1804 atomic_inc(&cache->stats.commit_count);
1805 r = dm_cache_commit(cache->cmd, clean_shutdown);
1806 if (r)
1807 metadata_operation_failed(cache, "dm_cache_commit", r);
1808
1809 return r;
1810}
1811
1812
1813
1814
1815static blk_status_t commit_op(void *context)
1816{
1817 struct cache *cache = context;
1818
1819 if (dm_cache_changed_this_transaction(cache->cmd))
1820 return errno_to_blk_status(commit(cache, false));
1821
1822 return 0;
1823}
1824
1825
1826
1827static bool process_flush_bio(struct cache *cache, struct bio *bio)
1828{
1829 struct per_bio_data *pb = get_per_bio_data(bio);
1830
1831 if (!pb->req_nr)
1832 remap_to_origin(cache, bio);
1833 else
1834 remap_to_cache(cache, bio, 0);
1835
1836 issue_after_commit(&cache->committer, bio);
1837 return true;
1838}
1839
1840static bool process_discard_bio(struct cache *cache, struct bio *bio)
1841{
1842 dm_dblock_t b, e;
1843
1844
1845
1846
1847 calc_discard_block_range(cache, bio, &b, &e);
1848 while (b != e) {
1849 set_discard(cache, b);
1850 b = to_dblock(from_dblock(b) + 1);
1851 }
1852
1853 if (cache->features.discard_passdown) {
1854 remap_to_origin(cache, bio);
1855 submit_bio_noacct(bio);
1856 } else
1857 bio_endio(bio);
1858
1859 return false;
1860}
1861
1862static void process_deferred_bios(struct work_struct *ws)
1863{
1864 struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1865
1866 bool commit_needed = false;
1867 struct bio_list bios;
1868 struct bio *bio;
1869
1870 bio_list_init(&bios);
1871
1872 spin_lock_irq(&cache->lock);
1873 bio_list_merge(&bios, &cache->deferred_bios);
1874 bio_list_init(&cache->deferred_bios);
1875 spin_unlock_irq(&cache->lock);
1876
1877 while ((bio = bio_list_pop(&bios))) {
1878 if (bio->bi_opf & REQ_PREFLUSH)
1879 commit_needed = process_flush_bio(cache, bio) || commit_needed;
1880
1881 else if (bio_op(bio) == REQ_OP_DISCARD)
1882 commit_needed = process_discard_bio(cache, bio) || commit_needed;
1883
1884 else
1885 commit_needed = process_bio(cache, bio) || commit_needed;
1886 }
1887
1888 if (commit_needed)
1889 schedule_commit(&cache->committer);
1890}
1891
1892
1893
1894
1895
1896static void requeue_deferred_bios(struct cache *cache)
1897{
1898 struct bio *bio;
1899 struct bio_list bios;
1900
1901 bio_list_init(&bios);
1902 bio_list_merge(&bios, &cache->deferred_bios);
1903 bio_list_init(&cache->deferred_bios);
1904
1905 while ((bio = bio_list_pop(&bios))) {
1906 bio->bi_status = BLK_STS_DM_REQUEUE;
1907 bio_endio(bio);
1908 }
1909}
1910
1911
1912
1913
1914
1915static void do_waker(struct work_struct *ws)
1916{
1917 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1918
1919 policy_tick(cache->policy, true);
1920 wake_migration_worker(cache);
1921 schedule_commit(&cache->committer);
1922 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1923}
1924
1925static void check_migrations(struct work_struct *ws)
1926{
1927 int r;
1928 struct policy_work *op;
1929 struct cache *cache = container_of(ws, struct cache, migration_worker);
1930 enum busy b;
1931
1932 for (;;) {
1933 b = spare_migration_bandwidth(cache);
1934
1935 r = policy_get_background_work(cache->policy, b == IDLE, &op);
1936 if (r == -ENODATA)
1937 break;
1938
1939 if (r) {
1940 DMERR_LIMIT("%s: policy_background_work failed",
1941 cache_device_name(cache));
1942 break;
1943 }
1944
1945 r = mg_start(cache, op, NULL);
1946 if (r)
1947 break;
1948 }
1949}
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959static void destroy(struct cache *cache)
1960{
1961 unsigned i;
1962
1963 mempool_exit(&cache->migration_pool);
1964
1965 if (cache->prison)
1966 dm_bio_prison_destroy_v2(cache->prison);
1967
1968 if (cache->wq)
1969 destroy_workqueue(cache->wq);
1970
1971 if (cache->dirty_bitset)
1972 free_bitset(cache->dirty_bitset);
1973
1974 if (cache->discard_bitset)
1975 free_bitset(cache->discard_bitset);
1976
1977 if (cache->copier)
1978 dm_kcopyd_client_destroy(cache->copier);
1979
1980 if (cache->cmd)
1981 dm_cache_metadata_close(cache->cmd);
1982
1983 if (cache->metadata_dev)
1984 dm_put_device(cache->ti, cache->metadata_dev);
1985
1986 if (cache->origin_dev)
1987 dm_put_device(cache->ti, cache->origin_dev);
1988
1989 if (cache->cache_dev)
1990 dm_put_device(cache->ti, cache->cache_dev);
1991
1992 if (cache->policy)
1993 dm_cache_policy_destroy(cache->policy);
1994
1995 for (i = 0; i < cache->nr_ctr_args ; i++)
1996 kfree(cache->ctr_args[i]);
1997 kfree(cache->ctr_args);
1998
1999 bioset_exit(&cache->bs);
2000
2001 kfree(cache);
2002}
2003
2004static void cache_dtr(struct dm_target *ti)
2005{
2006 struct cache *cache = ti->private;
2007
2008 destroy(cache);
2009}
2010
2011static sector_t get_dev_size(struct dm_dev *dev)
2012{
2013 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2014}
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047struct cache_args {
2048 struct dm_target *ti;
2049
2050 struct dm_dev *metadata_dev;
2051
2052 struct dm_dev *cache_dev;
2053 sector_t cache_sectors;
2054
2055 struct dm_dev *origin_dev;
2056 sector_t origin_sectors;
2057
2058 uint32_t block_size;
2059
2060 const char *policy_name;
2061 int policy_argc;
2062 const char **policy_argv;
2063
2064 struct cache_features features;
2065};
2066
2067static void destroy_cache_args(struct cache_args *ca)
2068{
2069 if (ca->metadata_dev)
2070 dm_put_device(ca->ti, ca->metadata_dev);
2071
2072 if (ca->cache_dev)
2073 dm_put_device(ca->ti, ca->cache_dev);
2074
2075 if (ca->origin_dev)
2076 dm_put_device(ca->ti, ca->origin_dev);
2077
2078 kfree(ca);
2079}
2080
2081static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2082{
2083 if (!as->argc) {
2084 *error = "Insufficient args";
2085 return false;
2086 }
2087
2088 return true;
2089}
2090
2091static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2092 char **error)
2093{
2094 int r;
2095 sector_t metadata_dev_size;
2096 char b[BDEVNAME_SIZE];
2097
2098 if (!at_least_one_arg(as, error))
2099 return -EINVAL;
2100
2101 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2102 &ca->metadata_dev);
2103 if (r) {
2104 *error = "Error opening metadata device";
2105 return r;
2106 }
2107
2108 metadata_dev_size = get_dev_size(ca->metadata_dev);
2109 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2110 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2111 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2112
2113 return 0;
2114}
2115
2116static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2117 char **error)
2118{
2119 int r;
2120
2121 if (!at_least_one_arg(as, error))
2122 return -EINVAL;
2123
2124 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2125 &ca->cache_dev);
2126 if (r) {
2127 *error = "Error opening cache device";
2128 return r;
2129 }
2130 ca->cache_sectors = get_dev_size(ca->cache_dev);
2131
2132 return 0;
2133}
2134
2135static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2136 char **error)
2137{
2138 int r;
2139
2140 if (!at_least_one_arg(as, error))
2141 return -EINVAL;
2142
2143 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2144 &ca->origin_dev);
2145 if (r) {
2146 *error = "Error opening origin device";
2147 return r;
2148 }
2149
2150 ca->origin_sectors = get_dev_size(ca->origin_dev);
2151 if (ca->ti->len > ca->origin_sectors) {
2152 *error = "Device size larger than cached device";
2153 return -EINVAL;
2154 }
2155
2156 return 0;
2157}
2158
2159static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2160 char **error)
2161{
2162 unsigned long block_size;
2163
2164 if (!at_least_one_arg(as, error))
2165 return -EINVAL;
2166
2167 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2168 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2169 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2170 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2171 *error = "Invalid data block size";
2172 return -EINVAL;
2173 }
2174
2175 if (block_size > ca->cache_sectors) {
2176 *error = "Data block size is larger than the cache device";
2177 return -EINVAL;
2178 }
2179
2180 ca->block_size = block_size;
2181
2182 return 0;
2183}
2184
2185static void init_features(struct cache_features *cf)
2186{
2187 cf->mode = CM_WRITE;
2188 cf->io_mode = CM_IO_WRITEBACK;
2189 cf->metadata_version = 1;
2190 cf->discard_passdown = true;
2191}
2192
2193static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2194 char **error)
2195{
2196 static const struct dm_arg _args[] = {
2197 {0, 3, "Invalid number of cache feature arguments"},
2198 };
2199
2200 int r, mode_ctr = 0;
2201 unsigned argc;
2202 const char *arg;
2203 struct cache_features *cf = &ca->features;
2204
2205 init_features(cf);
2206
2207 r = dm_read_arg_group(_args, as, &argc, error);
2208 if (r)
2209 return -EINVAL;
2210
2211 while (argc--) {
2212 arg = dm_shift_arg(as);
2213
2214 if (!strcasecmp(arg, "writeback")) {
2215 cf->io_mode = CM_IO_WRITEBACK;
2216 mode_ctr++;
2217 }
2218
2219 else if (!strcasecmp(arg, "writethrough")) {
2220 cf->io_mode = CM_IO_WRITETHROUGH;
2221 mode_ctr++;
2222 }
2223
2224 else if (!strcasecmp(arg, "passthrough")) {
2225 cf->io_mode = CM_IO_PASSTHROUGH;
2226 mode_ctr++;
2227 }
2228
2229 else if (!strcasecmp(arg, "metadata2"))
2230 cf->metadata_version = 2;
2231
2232 else if (!strcasecmp(arg, "no_discard_passdown"))
2233 cf->discard_passdown = false;
2234
2235 else {
2236 *error = "Unrecognised cache feature requested";
2237 return -EINVAL;
2238 }
2239 }
2240
2241 if (mode_ctr > 1) {
2242 *error = "Duplicate cache io_mode features requested";
2243 return -EINVAL;
2244 }
2245
2246 return 0;
2247}
2248
2249static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2250 char **error)
2251{
2252 static const struct dm_arg _args[] = {
2253 {0, 1024, "Invalid number of policy arguments"},
2254 };
2255
2256 int r;
2257
2258 if (!at_least_one_arg(as, error))
2259 return -EINVAL;
2260
2261 ca->policy_name = dm_shift_arg(as);
2262
2263 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2264 if (r)
2265 return -EINVAL;
2266
2267 ca->policy_argv = (const char **)as->argv;
2268 dm_consume_args(as, ca->policy_argc);
2269
2270 return 0;
2271}
2272
2273static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2274 char **error)
2275{
2276 int r;
2277 struct dm_arg_set as;
2278
2279 as.argc = argc;
2280 as.argv = argv;
2281
2282 r = parse_metadata_dev(ca, &as, error);
2283 if (r)
2284 return r;
2285
2286 r = parse_cache_dev(ca, &as, error);
2287 if (r)
2288 return r;
2289
2290 r = parse_origin_dev(ca, &as, error);
2291 if (r)
2292 return r;
2293
2294 r = parse_block_size(ca, &as, error);
2295 if (r)
2296 return r;
2297
2298 r = parse_features(ca, &as, error);
2299 if (r)
2300 return r;
2301
2302 r = parse_policy(ca, &as, error);
2303 if (r)
2304 return r;
2305
2306 return 0;
2307}
2308
2309
2310
2311static struct kmem_cache *migration_cache;
2312
2313#define NOT_CORE_OPTION 1
2314
2315static int process_config_option(struct cache *cache, const char *key, const char *value)
2316{
2317 unsigned long tmp;
2318
2319 if (!strcasecmp(key, "migration_threshold")) {
2320 if (kstrtoul(value, 10, &tmp))
2321 return -EINVAL;
2322
2323 cache->migration_threshold = tmp;
2324 return 0;
2325 }
2326
2327 return NOT_CORE_OPTION;
2328}
2329
2330static int set_config_value(struct cache *cache, const char *key, const char *value)
2331{
2332 int r = process_config_option(cache, key, value);
2333
2334 if (r == NOT_CORE_OPTION)
2335 r = policy_set_config_value(cache->policy, key, value);
2336
2337 if (r)
2338 DMWARN("bad config value for %s: %s", key, value);
2339
2340 return r;
2341}
2342
2343static int set_config_values(struct cache *cache, int argc, const char **argv)
2344{
2345 int r = 0;
2346
2347 if (argc & 1) {
2348 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2349 return -EINVAL;
2350 }
2351
2352 while (argc) {
2353 r = set_config_value(cache, argv[0], argv[1]);
2354 if (r)
2355 break;
2356
2357 argc -= 2;
2358 argv += 2;
2359 }
2360
2361 return r;
2362}
2363
2364static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2365 char **error)
2366{
2367 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2368 cache->cache_size,
2369 cache->origin_sectors,
2370 cache->sectors_per_block);
2371 if (IS_ERR(p)) {
2372 *error = "Error creating cache's policy";
2373 return PTR_ERR(p);
2374 }
2375 cache->policy = p;
2376 BUG_ON(!cache->policy);
2377
2378 return 0;
2379}
2380
2381
2382
2383
2384
2385#define MAX_DISCARD_BLOCKS (1 << 14)
2386
2387static bool too_many_discard_blocks(sector_t discard_block_size,
2388 sector_t origin_size)
2389{
2390 (void) sector_div(origin_size, discard_block_size);
2391
2392 return origin_size > MAX_DISCARD_BLOCKS;
2393}
2394
2395static sector_t calculate_discard_block_size(sector_t cache_block_size,
2396 sector_t origin_size)
2397{
2398 sector_t discard_block_size = cache_block_size;
2399
2400 if (origin_size)
2401 while (too_many_discard_blocks(discard_block_size, origin_size))
2402 discard_block_size *= 2;
2403
2404 return discard_block_size;
2405}
2406
2407static void set_cache_size(struct cache *cache, dm_cblock_t size)
2408{
2409 dm_block_t nr_blocks = from_cblock(size);
2410
2411 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2412 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2413 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2414 "Please consider increasing the cache block size to reduce the overall cache block count.",
2415 (unsigned long long) nr_blocks);
2416
2417 cache->cache_size = size;
2418}
2419
2420#define DEFAULT_MIGRATION_THRESHOLD 2048
2421
2422static int cache_create(struct cache_args *ca, struct cache **result)
2423{
2424 int r = 0;
2425 char **error = &ca->ti->error;
2426 struct cache *cache;
2427 struct dm_target *ti = ca->ti;
2428 dm_block_t origin_blocks;
2429 struct dm_cache_metadata *cmd;
2430 bool may_format = ca->features.mode == CM_WRITE;
2431
2432 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2433 if (!cache)
2434 return -ENOMEM;
2435
2436 cache->ti = ca->ti;
2437 ti->private = cache;
2438 ti->num_flush_bios = 2;
2439 ti->flush_supported = true;
2440
2441 ti->num_discard_bios = 1;
2442 ti->discards_supported = true;
2443
2444 ti->per_io_data_size = sizeof(struct per_bio_data);
2445
2446 cache->features = ca->features;
2447 if (writethrough_mode(cache)) {
2448
2449 r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
2450 if (r)
2451 goto bad;
2452 }
2453
2454 cache->metadata_dev = ca->metadata_dev;
2455 cache->origin_dev = ca->origin_dev;
2456 cache->cache_dev = ca->cache_dev;
2457
2458 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2459
2460 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2461 origin_blocks = block_div(origin_blocks, ca->block_size);
2462 cache->origin_blocks = to_oblock(origin_blocks);
2463
2464 cache->sectors_per_block = ca->block_size;
2465 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2466 r = -EINVAL;
2467 goto bad;
2468 }
2469
2470 if (ca->block_size & (ca->block_size - 1)) {
2471 dm_block_t cache_size = ca->cache_sectors;
2472
2473 cache->sectors_per_block_shift = -1;
2474 cache_size = block_div(cache_size, ca->block_size);
2475 set_cache_size(cache, to_cblock(cache_size));
2476 } else {
2477 cache->sectors_per_block_shift = __ffs(ca->block_size);
2478 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2479 }
2480
2481 r = create_cache_policy(cache, ca, error);
2482 if (r)
2483 goto bad;
2484
2485 cache->policy_nr_args = ca->policy_argc;
2486 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2487
2488 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2489 if (r) {
2490 *error = "Error setting cache policy's config values";
2491 goto bad;
2492 }
2493
2494 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2495 ca->block_size, may_format,
2496 dm_cache_policy_get_hint_size(cache->policy),
2497 ca->features.metadata_version);
2498 if (IS_ERR(cmd)) {
2499 *error = "Error creating metadata object";
2500 r = PTR_ERR(cmd);
2501 goto bad;
2502 }
2503 cache->cmd = cmd;
2504 set_cache_mode(cache, CM_WRITE);
2505 if (get_cache_mode(cache) != CM_WRITE) {
2506 *error = "Unable to get write access to metadata, please check/repair metadata.";
2507 r = -EINVAL;
2508 goto bad;
2509 }
2510
2511 if (passthrough_mode(cache)) {
2512 bool all_clean;
2513
2514 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2515 if (r) {
2516 *error = "dm_cache_metadata_all_clean() failed";
2517 goto bad;
2518 }
2519
2520 if (!all_clean) {
2521 *error = "Cannot enter passthrough mode unless all blocks are clean";
2522 r = -EINVAL;
2523 goto bad;
2524 }
2525
2526 policy_allow_migrations(cache->policy, false);
2527 }
2528
2529 spin_lock_init(&cache->lock);
2530 bio_list_init(&cache->deferred_bios);
2531 atomic_set(&cache->nr_allocated_migrations, 0);
2532 atomic_set(&cache->nr_io_migrations, 0);
2533 init_waitqueue_head(&cache->migration_wait);
2534
2535 r = -ENOMEM;
2536 atomic_set(&cache->nr_dirty, 0);
2537 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2538 if (!cache->dirty_bitset) {
2539 *error = "could not allocate dirty bitset";
2540 goto bad;
2541 }
2542 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2543
2544 cache->discard_block_size =
2545 calculate_discard_block_size(cache->sectors_per_block,
2546 cache->origin_sectors);
2547 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2548 cache->discard_block_size));
2549 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2550 if (!cache->discard_bitset) {
2551 *error = "could not allocate discard bitset";
2552 goto bad;
2553 }
2554 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2555
2556 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2557 if (IS_ERR(cache->copier)) {
2558 *error = "could not create kcopyd client";
2559 r = PTR_ERR(cache->copier);
2560 goto bad;
2561 }
2562
2563 cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2564 if (!cache->wq) {
2565 *error = "could not create workqueue for metadata object";
2566 goto bad;
2567 }
2568 INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2569 INIT_WORK(&cache->migration_worker, check_migrations);
2570 INIT_DELAYED_WORK(&cache->waker, do_waker);
2571
2572 cache->prison = dm_bio_prison_create_v2(cache->wq);
2573 if (!cache->prison) {
2574 *error = "could not create bio prison";
2575 goto bad;
2576 }
2577
2578 r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
2579 migration_cache);
2580 if (r) {
2581 *error = "Error creating cache's migration mempool";
2582 goto bad;
2583 }
2584
2585 cache->need_tick_bio = true;
2586 cache->sized = false;
2587 cache->invalidate = false;
2588 cache->commit_requested = false;
2589 cache->loaded_mappings = false;
2590 cache->loaded_discards = false;
2591
2592 load_stats(cache);
2593
2594 atomic_set(&cache->stats.demotion, 0);
2595 atomic_set(&cache->stats.promotion, 0);
2596 atomic_set(&cache->stats.copies_avoided, 0);
2597 atomic_set(&cache->stats.cache_cell_clash, 0);
2598 atomic_set(&cache->stats.commit_count, 0);
2599 atomic_set(&cache->stats.discard_count, 0);
2600
2601 spin_lock_init(&cache->invalidation_lock);
2602 INIT_LIST_HEAD(&cache->invalidation_requests);
2603
2604 batcher_init(&cache->committer, commit_op, cache,
2605 issue_op, cache, cache->wq);
2606 iot_init(&cache->tracker);
2607
2608 init_rwsem(&cache->background_work_lock);
2609 prevent_background_work(cache);
2610
2611 *result = cache;
2612 return 0;
2613bad:
2614 destroy(cache);
2615 return r;
2616}
2617
2618static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2619{
2620 unsigned i;
2621 const char **copy;
2622
2623 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2624 if (!copy)
2625 return -ENOMEM;
2626 for (i = 0; i < argc; i++) {
2627 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2628 if (!copy[i]) {
2629 while (i--)
2630 kfree(copy[i]);
2631 kfree(copy);
2632 return -ENOMEM;
2633 }
2634 }
2635
2636 cache->nr_ctr_args = argc;
2637 cache->ctr_args = copy;
2638
2639 return 0;
2640}
2641
2642static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2643{
2644 int r = -EINVAL;
2645 struct cache_args *ca;
2646 struct cache *cache = NULL;
2647
2648 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2649 if (!ca) {
2650 ti->error = "Error allocating memory for cache";
2651 return -ENOMEM;
2652 }
2653 ca->ti = ti;
2654
2655 r = parse_cache_args(ca, argc, argv, &ti->error);
2656 if (r)
2657 goto out;
2658
2659 r = cache_create(ca, &cache);
2660 if (r)
2661 goto out;
2662
2663 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2664 if (r) {
2665 destroy(cache);
2666 goto out;
2667 }
2668
2669 ti->private = cache;
2670out:
2671 destroy_cache_args(ca);
2672 return r;
2673}
2674
2675
2676
2677static int cache_map(struct dm_target *ti, struct bio *bio)
2678{
2679 struct cache *cache = ti->private;
2680
2681 int r;
2682 bool commit_needed;
2683 dm_oblock_t block = get_bio_block(cache, bio);
2684
2685 init_per_bio_data(bio);
2686 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2687
2688
2689
2690
2691
2692 remap_to_origin(cache, bio);
2693 accounted_begin(cache, bio);
2694 return DM_MAPIO_REMAPPED;
2695 }
2696
2697 if (discard_or_flush(bio)) {
2698 defer_bio(cache, bio);
2699 return DM_MAPIO_SUBMITTED;
2700 }
2701
2702 r = map_bio(cache, bio, block, &commit_needed);
2703 if (commit_needed)
2704 schedule_commit(&cache->committer);
2705
2706 return r;
2707}
2708
2709static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
2710{
2711 struct cache *cache = ti->private;
2712 unsigned long flags;
2713 struct per_bio_data *pb = get_per_bio_data(bio);
2714
2715 if (pb->tick) {
2716 policy_tick(cache->policy, false);
2717
2718 spin_lock_irqsave(&cache->lock, flags);
2719 cache->need_tick_bio = true;
2720 spin_unlock_irqrestore(&cache->lock, flags);
2721 }
2722
2723 bio_drop_shared_lock(cache, bio);
2724 accounted_complete(cache, bio);
2725
2726 return DM_ENDIO_DONE;
2727}
2728
2729static int write_dirty_bitset(struct cache *cache)
2730{
2731 int r;
2732
2733 if (get_cache_mode(cache) >= CM_READ_ONLY)
2734 return -EINVAL;
2735
2736 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2737 if (r)
2738 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2739
2740 return r;
2741}
2742
2743static int write_discard_bitset(struct cache *cache)
2744{
2745 unsigned i, r;
2746
2747 if (get_cache_mode(cache) >= CM_READ_ONLY)
2748 return -EINVAL;
2749
2750 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2751 cache->discard_nr_blocks);
2752 if (r) {
2753 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2754 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2755 return r;
2756 }
2757
2758 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2759 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2760 is_discarded(cache, to_dblock(i)));
2761 if (r) {
2762 metadata_operation_failed(cache, "dm_cache_set_discard", r);
2763 return r;
2764 }
2765 }
2766
2767 return 0;
2768}
2769
2770static int write_hints(struct cache *cache)
2771{
2772 int r;
2773
2774 if (get_cache_mode(cache) >= CM_READ_ONLY)
2775 return -EINVAL;
2776
2777 r = dm_cache_write_hints(cache->cmd, cache->policy);
2778 if (r) {
2779 metadata_operation_failed(cache, "dm_cache_write_hints", r);
2780 return r;
2781 }
2782
2783 return 0;
2784}
2785
2786
2787
2788
2789static bool sync_metadata(struct cache *cache)
2790{
2791 int r1, r2, r3, r4;
2792
2793 r1 = write_dirty_bitset(cache);
2794 if (r1)
2795 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2796
2797 r2 = write_discard_bitset(cache);
2798 if (r2)
2799 DMERR("%s: could not write discard bitset", cache_device_name(cache));
2800
2801 save_stats(cache);
2802
2803 r3 = write_hints(cache);
2804 if (r3)
2805 DMERR("%s: could not write hints", cache_device_name(cache));
2806
2807
2808
2809
2810
2811
2812 r4 = commit(cache, !r1 && !r2 && !r3);
2813 if (r4)
2814 DMERR("%s: could not write cache metadata", cache_device_name(cache));
2815
2816 return !r1 && !r2 && !r3 && !r4;
2817}
2818
2819static void cache_postsuspend(struct dm_target *ti)
2820{
2821 struct cache *cache = ti->private;
2822
2823 prevent_background_work(cache);
2824 BUG_ON(atomic_read(&cache->nr_io_migrations));
2825
2826 cancel_delayed_work_sync(&cache->waker);
2827 drain_workqueue(cache->wq);
2828 WARN_ON(cache->tracker.in_flight);
2829
2830
2831
2832
2833
2834 requeue_deferred_bios(cache);
2835
2836 if (get_cache_mode(cache) == CM_WRITE)
2837 (void) sync_metadata(cache);
2838}
2839
2840static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2841 bool dirty, uint32_t hint, bool hint_valid)
2842{
2843 int r;
2844 struct cache *cache = context;
2845
2846 if (dirty) {
2847 set_bit(from_cblock(cblock), cache->dirty_bitset);
2848 atomic_inc(&cache->nr_dirty);
2849 } else
2850 clear_bit(from_cblock(cblock), cache->dirty_bitset);
2851
2852 r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2853 if (r)
2854 return r;
2855
2856 return 0;
2857}
2858
2859
2860
2861
2862
2863
2864
2865struct discard_load_info {
2866 struct cache *cache;
2867
2868
2869
2870
2871
2872 dm_block_t block_size;
2873 dm_block_t discard_begin, discard_end;
2874};
2875
2876static void discard_load_info_init(struct cache *cache,
2877 struct discard_load_info *li)
2878{
2879 li->cache = cache;
2880 li->discard_begin = li->discard_end = 0;
2881}
2882
2883static void set_discard_range(struct discard_load_info *li)
2884{
2885 sector_t b, e;
2886
2887 if (li->discard_begin == li->discard_end)
2888 return;
2889
2890
2891
2892
2893 b = li->discard_begin * li->block_size;
2894 e = li->discard_end * li->block_size;
2895
2896
2897
2898
2899 b = dm_sector_div_up(b, li->cache->discard_block_size);
2900 sector_div(e, li->cache->discard_block_size);
2901
2902
2903
2904
2905
2906 if (e > from_dblock(li->cache->discard_nr_blocks))
2907 e = from_dblock(li->cache->discard_nr_blocks);
2908
2909 for (; b < e; b++)
2910 set_discard(li->cache, to_dblock(b));
2911}
2912
2913static int load_discard(void *context, sector_t discard_block_size,
2914 dm_dblock_t dblock, bool discard)
2915{
2916 struct discard_load_info *li = context;
2917
2918 li->block_size = discard_block_size;
2919
2920 if (discard) {
2921 if (from_dblock(dblock) == li->discard_end)
2922
2923
2924
2925 li->discard_end = li->discard_end + 1ULL;
2926
2927 else {
2928
2929
2930
2931 set_discard_range(li);
2932 li->discard_begin = from_dblock(dblock);
2933 li->discard_end = li->discard_begin + 1ULL;
2934 }
2935 } else {
2936 set_discard_range(li);
2937 li->discard_begin = li->discard_end = 0;
2938 }
2939
2940 return 0;
2941}
2942
2943static dm_cblock_t get_cache_dev_size(struct cache *cache)
2944{
2945 sector_t size = get_dev_size(cache->cache_dev);
2946 (void) sector_div(size, cache->sectors_per_block);
2947 return to_cblock(size);
2948}
2949
2950static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2951{
2952 if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
2953 if (cache->sized) {
2954 DMERR("%s: unable to extend cache due to missing cache table reload",
2955 cache_device_name(cache));
2956 return false;
2957 }
2958 }
2959
2960
2961
2962
2963 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
2964 new_size = to_cblock(from_cblock(new_size) + 1);
2965 if (is_dirty(cache, new_size)) {
2966 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
2967 cache_device_name(cache),
2968 (unsigned long long) from_cblock(new_size));
2969 return false;
2970 }
2971 }
2972
2973 return true;
2974}
2975
2976static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2977{
2978 int r;
2979
2980 r = dm_cache_resize(cache->cmd, new_size);
2981 if (r) {
2982 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
2983 metadata_operation_failed(cache, "dm_cache_resize", r);
2984 return r;
2985 }
2986
2987 set_cache_size(cache, new_size);
2988
2989 return 0;
2990}
2991
2992static int cache_preresume(struct dm_target *ti)
2993{
2994 int r = 0;
2995 struct cache *cache = ti->private;
2996 dm_cblock_t csize = get_cache_dev_size(cache);
2997
2998
2999
3000
3001 if (!cache->sized) {
3002 r = resize_cache_dev(cache, csize);
3003 if (r)
3004 return r;
3005
3006 cache->sized = true;
3007
3008 } else if (csize != cache->cache_size) {
3009 if (!can_resize(cache, csize))
3010 return -EINVAL;
3011
3012 r = resize_cache_dev(cache, csize);
3013 if (r)
3014 return r;
3015 }
3016
3017 if (!cache->loaded_mappings) {
3018 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3019 load_mapping, cache);
3020 if (r) {
3021 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3022 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3023 return r;
3024 }
3025
3026 cache->loaded_mappings = true;
3027 }
3028
3029 if (!cache->loaded_discards) {
3030 struct discard_load_info li;
3031
3032
3033
3034
3035
3036
3037 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3038
3039 discard_load_info_init(cache, &li);
3040 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3041 if (r) {
3042 DMERR("%s: could not load origin discards", cache_device_name(cache));
3043 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3044 return r;
3045 }
3046 set_discard_range(&li);
3047
3048 cache->loaded_discards = true;
3049 }
3050
3051 return r;
3052}
3053
3054static void cache_resume(struct dm_target *ti)
3055{
3056 struct cache *cache = ti->private;
3057
3058 cache->need_tick_bio = true;
3059 allow_background_work(cache);
3060 do_waker(&cache->waker.work);
3061}
3062
3063static void emit_flags(struct cache *cache, char *result,
3064 unsigned maxlen, ssize_t *sz_ptr)
3065{
3066 ssize_t sz = *sz_ptr;
3067 struct cache_features *cf = &cache->features;
3068 unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
3069
3070 DMEMIT("%u ", count);
3071
3072 if (cf->metadata_version == 2)
3073 DMEMIT("metadata2 ");
3074
3075 if (writethrough_mode(cache))
3076 DMEMIT("writethrough ");
3077
3078 else if (passthrough_mode(cache))
3079 DMEMIT("passthrough ");
3080
3081 else if (writeback_mode(cache))
3082 DMEMIT("writeback ");
3083
3084 else {
3085 DMEMIT("unknown ");
3086 DMERR("%s: internal error: unknown io mode: %d",
3087 cache_device_name(cache), (int) cf->io_mode);
3088 }
3089
3090 if (!cf->discard_passdown)
3091 DMEMIT("no_discard_passdown ");
3092
3093 *sz_ptr = sz;
3094}
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107static void cache_status(struct dm_target *ti, status_type_t type,
3108 unsigned status_flags, char *result, unsigned maxlen)
3109{
3110 int r = 0;
3111 unsigned i;
3112 ssize_t sz = 0;
3113 dm_block_t nr_free_blocks_metadata = 0;
3114 dm_block_t nr_blocks_metadata = 0;
3115 char buf[BDEVNAME_SIZE];
3116 struct cache *cache = ti->private;
3117 dm_cblock_t residency;
3118 bool needs_check;
3119
3120 switch (type) {
3121 case STATUSTYPE_INFO:
3122 if (get_cache_mode(cache) == CM_FAIL) {
3123 DMEMIT("Fail");
3124 break;
3125 }
3126
3127
3128 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3129 (void) commit(cache, false);
3130
3131 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3132 if (r) {
3133 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3134 cache_device_name(cache), r);
3135 goto err;
3136 }
3137
3138 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3139 if (r) {
3140 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3141 cache_device_name(cache), r);
3142 goto err;
3143 }
3144
3145 residency = policy_residency(cache->policy);
3146
3147 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3148 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3149 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3150 (unsigned long long)nr_blocks_metadata,
3151 (unsigned long long)cache->sectors_per_block,
3152 (unsigned long long) from_cblock(residency),
3153 (unsigned long long) from_cblock(cache->cache_size),
3154 (unsigned) atomic_read(&cache->stats.read_hit),
3155 (unsigned) atomic_read(&cache->stats.read_miss),
3156 (unsigned) atomic_read(&cache->stats.write_hit),
3157 (unsigned) atomic_read(&cache->stats.write_miss),
3158 (unsigned) atomic_read(&cache->stats.demotion),
3159 (unsigned) atomic_read(&cache->stats.promotion),
3160 (unsigned long) atomic_read(&cache->nr_dirty));
3161
3162 emit_flags(cache, result, maxlen, &sz);
3163
3164 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3165
3166 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3167 if (sz < maxlen) {
3168 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3169 if (r)
3170 DMERR("%s: policy_emit_config_values returned %d",
3171 cache_device_name(cache), r);
3172 }
3173
3174 if (get_cache_mode(cache) == CM_READ_ONLY)
3175 DMEMIT("ro ");
3176 else
3177 DMEMIT("rw ");
3178
3179 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3180
3181 if (r || needs_check)
3182 DMEMIT("needs_check ");
3183 else
3184 DMEMIT("- ");
3185
3186 break;
3187
3188 case STATUSTYPE_TABLE:
3189 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3190 DMEMIT("%s ", buf);
3191 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3192 DMEMIT("%s ", buf);
3193 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3194 DMEMIT("%s", buf);
3195
3196 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3197 DMEMIT(" %s", cache->ctr_args[i]);
3198 if (cache->nr_ctr_args)
3199 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3200 }
3201
3202 return;
3203
3204err:
3205 DMEMIT("Error");
3206}
3207
3208
3209
3210
3211
3212struct cblock_range {
3213 dm_cblock_t begin;
3214 dm_cblock_t end;
3215};
3216
3217
3218
3219
3220
3221
3222
3223static int parse_cblock_range(struct cache *cache, const char *str,
3224 struct cblock_range *result)
3225{
3226 char dummy;
3227 uint64_t b, e;
3228 int r;
3229
3230
3231
3232
3233 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3234 if (r < 0)
3235 return r;
3236
3237 if (r == 2) {
3238 result->begin = to_cblock(b);
3239 result->end = to_cblock(e);
3240 return 0;
3241 }
3242
3243
3244
3245
3246 r = sscanf(str, "%llu%c", &b, &dummy);
3247 if (r < 0)
3248 return r;
3249
3250 if (r == 1) {
3251 result->begin = to_cblock(b);
3252 result->end = to_cblock(from_cblock(result->begin) + 1u);
3253 return 0;
3254 }
3255
3256 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3257 return -EINVAL;
3258}
3259
3260static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3261{
3262 uint64_t b = from_cblock(range->begin);
3263 uint64_t e = from_cblock(range->end);
3264 uint64_t n = from_cblock(cache->cache_size);
3265
3266 if (b >= n) {
3267 DMERR("%s: begin cblock out of range: %llu >= %llu",
3268 cache_device_name(cache), b, n);
3269 return -EINVAL;
3270 }
3271
3272 if (e > n) {
3273 DMERR("%s: end cblock out of range: %llu > %llu",
3274 cache_device_name(cache), e, n);
3275 return -EINVAL;
3276 }
3277
3278 if (b >= e) {
3279 DMERR("%s: invalid cblock range: %llu >= %llu",
3280 cache_device_name(cache), b, e);
3281 return -EINVAL;
3282 }
3283
3284 return 0;
3285}
3286
3287static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3288{
3289 return to_cblock(from_cblock(b) + 1);
3290}
3291
3292static int request_invalidation(struct cache *cache, struct cblock_range *range)
3293{
3294 int r = 0;
3295
3296
3297
3298
3299
3300
3301
3302 while (range->begin != range->end) {
3303 r = invalidate_cblock(cache, range->begin);
3304 if (r)
3305 return r;
3306
3307 range->begin = cblock_succ(range->begin);
3308 }
3309
3310 cache->commit_requested = true;
3311 return r;
3312}
3313
3314static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3315 const char **cblock_ranges)
3316{
3317 int r = 0;
3318 unsigned i;
3319 struct cblock_range range;
3320
3321 if (!passthrough_mode(cache)) {
3322 DMERR("%s: cache has to be in passthrough mode for invalidation",
3323 cache_device_name(cache));
3324 return -EPERM;
3325 }
3326
3327 for (i = 0; i < count; i++) {
3328 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3329 if (r)
3330 break;
3331
3332 r = validate_cblock_range(cache, &range);
3333 if (r)
3334 break;
3335
3336
3337
3338
3339 r = request_invalidation(cache, &range);
3340 if (r)
3341 break;
3342 }
3343
3344 return r;
3345}
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
3356 char *result, unsigned maxlen)
3357{
3358 struct cache *cache = ti->private;
3359
3360 if (!argc)
3361 return -EINVAL;
3362
3363 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3364 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3365 cache_device_name(cache));
3366 return -EOPNOTSUPP;
3367 }
3368
3369 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3370 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3371
3372 if (argc != 2)
3373 return -EINVAL;
3374
3375 return set_config_value(cache, argv[0], argv[1]);
3376}
3377
3378static int cache_iterate_devices(struct dm_target *ti,
3379 iterate_devices_callout_fn fn, void *data)
3380{
3381 int r = 0;
3382 struct cache *cache = ti->private;
3383
3384 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3385 if (!r)
3386 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3387
3388 return r;
3389}
3390
3391static bool origin_dev_supports_discard(struct block_device *origin_bdev)
3392{
3393 struct request_queue *q = bdev_get_queue(origin_bdev);
3394
3395 return q && blk_queue_discard(q);
3396}
3397
3398
3399
3400
3401
3402static void disable_passdown_if_not_supported(struct cache *cache)
3403{
3404 struct block_device *origin_bdev = cache->origin_dev->bdev;
3405 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3406 const char *reason = NULL;
3407 char buf[BDEVNAME_SIZE];
3408
3409 if (!cache->features.discard_passdown)
3410 return;
3411
3412 if (!origin_dev_supports_discard(origin_bdev))
3413 reason = "discard unsupported";
3414
3415 else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
3416 reason = "max discard sectors smaller than a block";
3417
3418 if (reason) {
3419 DMWARN("Origin device (%s) %s: Disabling discard passdown.",
3420 bdevname(origin_bdev, buf), reason);
3421 cache->features.discard_passdown = false;
3422 }
3423}
3424
3425static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3426{
3427 struct block_device *origin_bdev = cache->origin_dev->bdev;
3428 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3429
3430 if (!cache->features.discard_passdown) {
3431
3432 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3433 cache->origin_sectors);
3434 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3435 return;
3436 }
3437
3438
3439
3440
3441
3442 limits->max_discard_sectors = origin_limits->max_discard_sectors;
3443 limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
3444 limits->discard_granularity = origin_limits->discard_granularity;
3445 limits->discard_alignment = origin_limits->discard_alignment;
3446 limits->discard_misaligned = origin_limits->discard_misaligned;
3447}
3448
3449static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3450{
3451 struct cache *cache = ti->private;
3452 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3453
3454
3455
3456
3457
3458 if (io_opt_sectors < cache->sectors_per_block ||
3459 do_div(io_opt_sectors, cache->sectors_per_block)) {
3460 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3461 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3462 }
3463
3464 disable_passdown_if_not_supported(cache);
3465 set_discard_limits(cache, limits);
3466}
3467
3468
3469
3470static struct target_type cache_target = {
3471 .name = "cache",
3472 .version = {2, 2, 0},
3473 .module = THIS_MODULE,
3474 .ctr = cache_ctr,
3475 .dtr = cache_dtr,
3476 .map = cache_map,
3477 .end_io = cache_end_io,
3478 .postsuspend = cache_postsuspend,
3479 .preresume = cache_preresume,
3480 .resume = cache_resume,
3481 .status = cache_status,
3482 .message = cache_message,
3483 .iterate_devices = cache_iterate_devices,
3484 .io_hints = cache_io_hints,
3485};
3486
3487static int __init dm_cache_init(void)
3488{
3489 int r;
3490
3491 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3492 if (!migration_cache)
3493 return -ENOMEM;
3494
3495 r = dm_register_target(&cache_target);
3496 if (r) {
3497 DMERR("cache target registration failed: %d", r);
3498 kmem_cache_destroy(migration_cache);
3499 return r;
3500 }
3501
3502 return 0;
3503}
3504
3505static void __exit dm_cache_exit(void)
3506{
3507 dm_unregister_target(&cache_target);
3508 kmem_cache_destroy(migration_cache);
3509}
3510
3511module_init(dm_cache_init);
3512module_exit(dm_cache_exit);
3513
3514MODULE_DESCRIPTION(DM_NAME " cache target");
3515MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3516MODULE_LICENSE("GPL");
3517