1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison-v2.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11#include "dm-io-tracker.h"
12
13#include <linux/dm-io.h>
14#include <linux/dm-kcopyd.h>
15#include <linux/jiffies.h>
16#include <linux/init.h>
17#include <linux/mempool.h>
18#include <linux/module.h>
19#include <linux/rwsem.h>
20#include <linux/slab.h>
21#include <linux/vmalloc.h>
22
23#define DM_MSG_PREFIX "cache"
24
25DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
26 "A percentage of time allocated for copying to and/or from cache");
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47struct continuation {
48 struct work_struct ws;
49 blk_status_t input;
50};
51
52static inline void init_continuation(struct continuation *k,
53 void (*fn)(struct work_struct *))
54{
55 INIT_WORK(&k->ws, fn);
56 k->input = 0;
57}
58
59static inline void queue_continuation(struct workqueue_struct *wq,
60 struct continuation *k)
61{
62 queue_work(wq, &k->ws);
63}
64
65
66
67
68
69
70
71struct batcher {
72
73
74
75 blk_status_t (*commit_op)(void *context);
76 void *commit_context;
77
78
79
80
81
82 void (*issue_op)(struct bio *bio, void *context);
83 void *issue_context;
84
85
86
87
88 struct workqueue_struct *wq;
89
90 spinlock_t lock;
91 struct list_head work_items;
92 struct bio_list bios;
93 struct work_struct commit_work;
94
95 bool commit_scheduled;
96};
97
98static void __commit(struct work_struct *_ws)
99{
100 struct batcher *b = container_of(_ws, struct batcher, commit_work);
101 blk_status_t r;
102 struct list_head work_items;
103 struct work_struct *ws, *tmp;
104 struct continuation *k;
105 struct bio *bio;
106 struct bio_list bios;
107
108 INIT_LIST_HEAD(&work_items);
109 bio_list_init(&bios);
110
111
112
113
114
115 spin_lock_irq(&b->lock);
116 list_splice_init(&b->work_items, &work_items);
117 bio_list_merge(&bios, &b->bios);
118 bio_list_init(&b->bios);
119 b->commit_scheduled = false;
120 spin_unlock_irq(&b->lock);
121
122 r = b->commit_op(b->commit_context);
123
124 list_for_each_entry_safe(ws, tmp, &work_items, entry) {
125 k = container_of(ws, struct continuation, ws);
126 k->input = r;
127 INIT_LIST_HEAD(&ws->entry);
128 queue_work(b->wq, ws);
129 }
130
131 while ((bio = bio_list_pop(&bios))) {
132 if (r) {
133 bio->bi_status = r;
134 bio_endio(bio);
135 } else
136 b->issue_op(bio, b->issue_context);
137 }
138}
139
140static void batcher_init(struct batcher *b,
141 blk_status_t (*commit_op)(void *),
142 void *commit_context,
143 void (*issue_op)(struct bio *bio, void *),
144 void *issue_context,
145 struct workqueue_struct *wq)
146{
147 b->commit_op = commit_op;
148 b->commit_context = commit_context;
149 b->issue_op = issue_op;
150 b->issue_context = issue_context;
151 b->wq = wq;
152
153 spin_lock_init(&b->lock);
154 INIT_LIST_HEAD(&b->work_items);
155 bio_list_init(&b->bios);
156 INIT_WORK(&b->commit_work, __commit);
157 b->commit_scheduled = false;
158}
159
160static void async_commit(struct batcher *b)
161{
162 queue_work(b->wq, &b->commit_work);
163}
164
165static void continue_after_commit(struct batcher *b, struct continuation *k)
166{
167 bool commit_scheduled;
168
169 spin_lock_irq(&b->lock);
170 commit_scheduled = b->commit_scheduled;
171 list_add_tail(&k->ws.entry, &b->work_items);
172 spin_unlock_irq(&b->lock);
173
174 if (commit_scheduled)
175 async_commit(b);
176}
177
178
179
180
181static void issue_after_commit(struct batcher *b, struct bio *bio)
182{
183 bool commit_scheduled;
184
185 spin_lock_irq(&b->lock);
186 commit_scheduled = b->commit_scheduled;
187 bio_list_add(&b->bios, bio);
188 spin_unlock_irq(&b->lock);
189
190 if (commit_scheduled)
191 async_commit(b);
192}
193
194
195
196
197static void schedule_commit(struct batcher *b)
198{
199 bool immediate;
200
201 spin_lock_irq(&b->lock);
202 immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
203 b->commit_scheduled = true;
204 spin_unlock_irq(&b->lock);
205
206 if (immediate)
207 async_commit(b);
208}
209
210
211
212
213
214
215struct dm_hook_info {
216 bio_end_io_t *bi_end_io;
217};
218
219static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
220 bio_end_io_t *bi_end_io, void *bi_private)
221{
222 h->bi_end_io = bio->bi_end_io;
223
224 bio->bi_end_io = bi_end_io;
225 bio->bi_private = bi_private;
226}
227
228static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
229{
230 bio->bi_end_io = h->bi_end_io;
231}
232
233
234
235#define MIGRATION_POOL_SIZE 128
236#define COMMIT_PERIOD HZ
237#define MIGRATION_COUNT_WINDOW 10
238
239
240
241
242
243#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
244#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
245
246enum cache_metadata_mode {
247 CM_WRITE,
248 CM_READ_ONLY,
249 CM_FAIL
250};
251
252enum cache_io_mode {
253
254
255
256
257
258 CM_IO_WRITEBACK,
259
260
261
262
263
264 CM_IO_WRITETHROUGH,
265
266
267
268
269
270
271
272 CM_IO_PASSTHROUGH
273};
274
275struct cache_features {
276 enum cache_metadata_mode mode;
277 enum cache_io_mode io_mode;
278 unsigned metadata_version;
279 bool discard_passdown:1;
280};
281
282struct cache_stats {
283 atomic_t read_hit;
284 atomic_t read_miss;
285 atomic_t write_hit;
286 atomic_t write_miss;
287 atomic_t demotion;
288 atomic_t promotion;
289 atomic_t writeback;
290 atomic_t copies_avoided;
291 atomic_t cache_cell_clash;
292 atomic_t commit_count;
293 atomic_t discard_count;
294};
295
296struct cache {
297 struct dm_target *ti;
298 spinlock_t lock;
299
300
301
302
303 int sectors_per_block_shift;
304 sector_t sectors_per_block;
305
306 struct dm_cache_metadata *cmd;
307
308
309
310
311 struct dm_dev *metadata_dev;
312
313
314
315
316 struct dm_dev *origin_dev;
317
318
319
320
321 struct dm_dev *cache_dev;
322
323
324
325
326 dm_oblock_t origin_blocks;
327 sector_t origin_sectors;
328
329
330
331
332 dm_cblock_t cache_size;
333
334
335
336
337 spinlock_t invalidation_lock;
338 struct list_head invalidation_requests;
339
340 sector_t migration_threshold;
341 wait_queue_head_t migration_wait;
342 atomic_t nr_allocated_migrations;
343
344
345
346
347
348 atomic_t nr_io_migrations;
349
350 struct bio_list deferred_bios;
351
352 struct rw_semaphore quiesce_lock;
353
354
355
356
357 dm_dblock_t discard_nr_blocks;
358 unsigned long *discard_bitset;
359 uint32_t discard_block_size;
360
361
362
363
364
365 unsigned nr_ctr_args;
366 const char **ctr_args;
367
368 struct dm_kcopyd_client *copier;
369 struct work_struct deferred_bio_worker;
370 struct work_struct migration_worker;
371 struct workqueue_struct *wq;
372 struct delayed_work waker;
373 struct dm_bio_prison_v2 *prison;
374
375
376
377
378 unsigned long *dirty_bitset;
379 atomic_t nr_dirty;
380
381 unsigned policy_nr_args;
382 struct dm_cache_policy *policy;
383
384
385
386
387 struct cache_features features;
388
389 struct cache_stats stats;
390
391 bool need_tick_bio:1;
392 bool sized:1;
393 bool invalidate:1;
394 bool commit_requested:1;
395 bool loaded_mappings:1;
396 bool loaded_discards:1;
397
398 struct rw_semaphore background_work_lock;
399
400 struct batcher committer;
401 struct work_struct commit_ws;
402
403 struct dm_io_tracker tracker;
404
405 mempool_t migration_pool;
406
407 struct bio_set bs;
408};
409
410struct per_bio_data {
411 bool tick:1;
412 unsigned req_nr:2;
413 struct dm_bio_prison_cell_v2 *cell;
414 struct dm_hook_info hook_info;
415 sector_t len;
416};
417
418struct dm_cache_migration {
419 struct continuation k;
420 struct cache *cache;
421
422 struct policy_work *op;
423 struct bio *overwrite_bio;
424 struct dm_bio_prison_cell_v2 *cell;
425
426 dm_cblock_t invalidate_cblock;
427 dm_oblock_t invalidate_oblock;
428};
429
430
431
432static bool writethrough_mode(struct cache *cache)
433{
434 return cache->features.io_mode == CM_IO_WRITETHROUGH;
435}
436
437static bool writeback_mode(struct cache *cache)
438{
439 return cache->features.io_mode == CM_IO_WRITEBACK;
440}
441
442static inline bool passthrough_mode(struct cache *cache)
443{
444 return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
445}
446
447
448
449static void wake_deferred_bio_worker(struct cache *cache)
450{
451 queue_work(cache->wq, &cache->deferred_bio_worker);
452}
453
454static void wake_migration_worker(struct cache *cache)
455{
456 if (passthrough_mode(cache))
457 return;
458
459 queue_work(cache->wq, &cache->migration_worker);
460}
461
462
463
464static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
465{
466 return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
467}
468
469static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
470{
471 dm_bio_prison_free_cell_v2(cache->prison, cell);
472}
473
474static struct dm_cache_migration *alloc_migration(struct cache *cache)
475{
476 struct dm_cache_migration *mg;
477
478 mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
479
480 memset(mg, 0, sizeof(*mg));
481
482 mg->cache = cache;
483 atomic_inc(&cache->nr_allocated_migrations);
484
485 return mg;
486}
487
488static void free_migration(struct dm_cache_migration *mg)
489{
490 struct cache *cache = mg->cache;
491
492 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
493 wake_up(&cache->migration_wait);
494
495 mempool_free(mg, &cache->migration_pool);
496}
497
498
499
500static inline dm_oblock_t oblock_succ(dm_oblock_t b)
501{
502 return to_oblock(from_oblock(b) + 1ull);
503}
504
505static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
506{
507 key->virtual = 0;
508 key->dev = 0;
509 key->block_begin = from_oblock(begin);
510 key->block_end = from_oblock(end);
511}
512
513
514
515
516
517#define WRITE_LOCK_LEVEL 0
518#define READ_WRITE_LOCK_LEVEL 1
519
520static unsigned lock_level(struct bio *bio)
521{
522 return bio_data_dir(bio) == WRITE ?
523 WRITE_LOCK_LEVEL :
524 READ_WRITE_LOCK_LEVEL;
525}
526
527
528
529
530
531static struct per_bio_data *get_per_bio_data(struct bio *bio)
532{
533 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
534 BUG_ON(!pb);
535 return pb;
536}
537
538static struct per_bio_data *init_per_bio_data(struct bio *bio)
539{
540 struct per_bio_data *pb = get_per_bio_data(bio);
541
542 pb->tick = false;
543 pb->req_nr = dm_bio_get_target_bio_nr(bio);
544 pb->cell = NULL;
545 pb->len = 0;
546
547 return pb;
548}
549
550
551
552static void defer_bio(struct cache *cache, struct bio *bio)
553{
554 spin_lock_irq(&cache->lock);
555 bio_list_add(&cache->deferred_bios, bio);
556 spin_unlock_irq(&cache->lock);
557
558 wake_deferred_bio_worker(cache);
559}
560
561static void defer_bios(struct cache *cache, struct bio_list *bios)
562{
563 spin_lock_irq(&cache->lock);
564 bio_list_merge(&cache->deferred_bios, bios);
565 bio_list_init(bios);
566 spin_unlock_irq(&cache->lock);
567
568 wake_deferred_bio_worker(cache);
569}
570
571
572
573static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
574{
575 bool r;
576 struct per_bio_data *pb;
577 struct dm_cell_key_v2 key;
578 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
579 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
580
581 cell_prealloc = alloc_prison_cell(cache);
582
583 build_key(oblock, end, &key);
584 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
585 if (!r) {
586
587
588
589 free_prison_cell(cache, cell_prealloc);
590 return r;
591 }
592
593 if (cell != cell_prealloc)
594 free_prison_cell(cache, cell_prealloc);
595
596 pb = get_per_bio_data(bio);
597 pb->cell = cell;
598
599 return r;
600}
601
602
603
604static bool is_dirty(struct cache *cache, dm_cblock_t b)
605{
606 return test_bit(from_cblock(b), cache->dirty_bitset);
607}
608
609static void set_dirty(struct cache *cache, dm_cblock_t cblock)
610{
611 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
612 atomic_inc(&cache->nr_dirty);
613 policy_set_dirty(cache->policy, cblock);
614 }
615}
616
617
618
619
620
621static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
622{
623 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
624 atomic_inc(&cache->nr_dirty);
625 policy_set_dirty(cache->policy, cblock);
626}
627
628static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
629{
630 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
631 if (atomic_dec_return(&cache->nr_dirty) == 0)
632 dm_table_event(cache->ti->table);
633 }
634
635 policy_clear_dirty(cache->policy, cblock);
636}
637
638
639
640static bool block_size_is_power_of_two(struct cache *cache)
641{
642 return cache->sectors_per_block_shift >= 0;
643}
644
645static dm_block_t block_div(dm_block_t b, uint32_t n)
646{
647 do_div(b, n);
648
649 return b;
650}
651
652static dm_block_t oblocks_per_dblock(struct cache *cache)
653{
654 dm_block_t oblocks = cache->discard_block_size;
655
656 if (block_size_is_power_of_two(cache))
657 oblocks >>= cache->sectors_per_block_shift;
658 else
659 oblocks = block_div(oblocks, cache->sectors_per_block);
660
661 return oblocks;
662}
663
664static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
665{
666 return to_dblock(block_div(from_oblock(oblock),
667 oblocks_per_dblock(cache)));
668}
669
670static void set_discard(struct cache *cache, dm_dblock_t b)
671{
672 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
673 atomic_inc(&cache->stats.discard_count);
674
675 spin_lock_irq(&cache->lock);
676 set_bit(from_dblock(b), cache->discard_bitset);
677 spin_unlock_irq(&cache->lock);
678}
679
680static void clear_discard(struct cache *cache, dm_dblock_t b)
681{
682 spin_lock_irq(&cache->lock);
683 clear_bit(from_dblock(b), cache->discard_bitset);
684 spin_unlock_irq(&cache->lock);
685}
686
687static bool is_discarded(struct cache *cache, dm_dblock_t b)
688{
689 int r;
690 spin_lock_irq(&cache->lock);
691 r = test_bit(from_dblock(b), cache->discard_bitset);
692 spin_unlock_irq(&cache->lock);
693
694 return r;
695}
696
697static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
698{
699 int r;
700 spin_lock_irq(&cache->lock);
701 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
702 cache->discard_bitset);
703 spin_unlock_irq(&cache->lock);
704
705 return r;
706}
707
708
709
710
711static void remap_to_origin(struct cache *cache, struct bio *bio)
712{
713 bio_set_dev(bio, cache->origin_dev->bdev);
714}
715
716static void remap_to_cache(struct cache *cache, struct bio *bio,
717 dm_cblock_t cblock)
718{
719 sector_t bi_sector = bio->bi_iter.bi_sector;
720 sector_t block = from_cblock(cblock);
721
722 bio_set_dev(bio, cache->cache_dev->bdev);
723 if (!block_size_is_power_of_two(cache))
724 bio->bi_iter.bi_sector =
725 (block * cache->sectors_per_block) +
726 sector_div(bi_sector, cache->sectors_per_block);
727 else
728 bio->bi_iter.bi_sector =
729 (block << cache->sectors_per_block_shift) |
730 (bi_sector & (cache->sectors_per_block - 1));
731}
732
733static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
734{
735 struct per_bio_data *pb;
736
737 spin_lock_irq(&cache->lock);
738 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
739 bio_op(bio) != REQ_OP_DISCARD) {
740 pb = get_per_bio_data(bio);
741 pb->tick = true;
742 cache->need_tick_bio = false;
743 }
744 spin_unlock_irq(&cache->lock);
745}
746
747static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
748 dm_oblock_t oblock, bool bio_has_pbd)
749{
750 if (bio_has_pbd)
751 check_if_tick_bio_needed(cache, bio);
752 remap_to_origin(cache, bio);
753 if (bio_data_dir(bio) == WRITE)
754 clear_discard(cache, oblock_to_dblock(cache, oblock));
755}
756
757static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
758 dm_oblock_t oblock)
759{
760
761 __remap_to_origin_clear_discard(cache, bio, oblock, true);
762}
763
764static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
765 dm_oblock_t oblock, dm_cblock_t cblock)
766{
767 check_if_tick_bio_needed(cache, bio);
768 remap_to_cache(cache, bio, cblock);
769 if (bio_data_dir(bio) == WRITE) {
770 set_dirty(cache, cblock);
771 clear_discard(cache, oblock_to_dblock(cache, oblock));
772 }
773}
774
775static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
776{
777 sector_t block_nr = bio->bi_iter.bi_sector;
778
779 if (!block_size_is_power_of_two(cache))
780 (void) sector_div(block_nr, cache->sectors_per_block);
781 else
782 block_nr >>= cache->sectors_per_block_shift;
783
784 return to_oblock(block_nr);
785}
786
787static bool accountable_bio(struct cache *cache, struct bio *bio)
788{
789 return bio_op(bio) != REQ_OP_DISCARD;
790}
791
792static void accounted_begin(struct cache *cache, struct bio *bio)
793{
794 struct per_bio_data *pb;
795
796 if (accountable_bio(cache, bio)) {
797 pb = get_per_bio_data(bio);
798 pb->len = bio_sectors(bio);
799 dm_iot_io_begin(&cache->tracker, pb->len);
800 }
801}
802
803static void accounted_complete(struct cache *cache, struct bio *bio)
804{
805 struct per_bio_data *pb = get_per_bio_data(bio);
806
807 dm_iot_io_end(&cache->tracker, pb->len);
808}
809
810static void accounted_request(struct cache *cache, struct bio *bio)
811{
812 accounted_begin(cache, bio);
813 submit_bio_noacct(bio);
814}
815
816static void issue_op(struct bio *bio, void *context)
817{
818 struct cache *cache = context;
819 accounted_request(cache, bio);
820}
821
822
823
824
825
826static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
827 dm_oblock_t oblock, dm_cblock_t cblock)
828{
829 struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
830
831 BUG_ON(!origin_bio);
832
833 bio_chain(origin_bio, bio);
834
835
836
837
838 __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
839 submit_bio(origin_bio);
840
841 remap_to_cache(cache, bio, cblock);
842}
843
844
845
846
847static enum cache_metadata_mode get_cache_mode(struct cache *cache)
848{
849 return cache->features.mode;
850}
851
852static const char *cache_device_name(struct cache *cache)
853{
854 return dm_table_device_name(cache->ti->table);
855}
856
857static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
858{
859 const char *descs[] = {
860 "write",
861 "read-only",
862 "fail"
863 };
864
865 dm_table_event(cache->ti->table);
866 DMINFO("%s: switching cache to %s mode",
867 cache_device_name(cache), descs[(int)mode]);
868}
869
870static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
871{
872 bool needs_check;
873 enum cache_metadata_mode old_mode = get_cache_mode(cache);
874
875 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
876 DMERR("%s: unable to read needs_check flag, setting failure mode.",
877 cache_device_name(cache));
878 new_mode = CM_FAIL;
879 }
880
881 if (new_mode == CM_WRITE && needs_check) {
882 DMERR("%s: unable to switch cache to write mode until repaired.",
883 cache_device_name(cache));
884 if (old_mode != new_mode)
885 new_mode = old_mode;
886 else
887 new_mode = CM_READ_ONLY;
888 }
889
890
891 if (old_mode == CM_FAIL)
892 new_mode = CM_FAIL;
893
894 switch (new_mode) {
895 case CM_FAIL:
896 case CM_READ_ONLY:
897 dm_cache_metadata_set_read_only(cache->cmd);
898 break;
899
900 case CM_WRITE:
901 dm_cache_metadata_set_read_write(cache->cmd);
902 break;
903 }
904
905 cache->features.mode = new_mode;
906
907 if (new_mode != old_mode)
908 notify_mode_switch(cache, new_mode);
909}
910
911static void abort_transaction(struct cache *cache)
912{
913 const char *dev_name = cache_device_name(cache);
914
915 if (get_cache_mode(cache) >= CM_READ_ONLY)
916 return;
917
918 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
919 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
920 set_cache_mode(cache, CM_FAIL);
921 }
922
923 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
924 if (dm_cache_metadata_abort(cache->cmd)) {
925 DMERR("%s: failed to abort metadata transaction", dev_name);
926 set_cache_mode(cache, CM_FAIL);
927 }
928}
929
930static void metadata_operation_failed(struct cache *cache, const char *op, int r)
931{
932 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
933 cache_device_name(cache), op, r);
934 abort_transaction(cache);
935 set_cache_mode(cache, CM_READ_ONLY);
936}
937
938
939
940static void load_stats(struct cache *cache)
941{
942 struct dm_cache_statistics stats;
943
944 dm_cache_metadata_get_stats(cache->cmd, &stats);
945 atomic_set(&cache->stats.read_hit, stats.read_hits);
946 atomic_set(&cache->stats.read_miss, stats.read_misses);
947 atomic_set(&cache->stats.write_hit, stats.write_hits);
948 atomic_set(&cache->stats.write_miss, stats.write_misses);
949}
950
951static void save_stats(struct cache *cache)
952{
953 struct dm_cache_statistics stats;
954
955 if (get_cache_mode(cache) >= CM_READ_ONLY)
956 return;
957
958 stats.read_hits = atomic_read(&cache->stats.read_hit);
959 stats.read_misses = atomic_read(&cache->stats.read_miss);
960 stats.write_hits = atomic_read(&cache->stats.write_hit);
961 stats.write_misses = atomic_read(&cache->stats.write_miss);
962
963 dm_cache_metadata_set_stats(cache->cmd, &stats);
964}
965
966static void update_stats(struct cache_stats *stats, enum policy_operation op)
967{
968 switch (op) {
969 case POLICY_PROMOTE:
970 atomic_inc(&stats->promotion);
971 break;
972
973 case POLICY_DEMOTE:
974 atomic_inc(&stats->demotion);
975 break;
976
977 case POLICY_WRITEBACK:
978 atomic_inc(&stats->writeback);
979 break;
980 }
981}
982
983
984
985
986
987
988
989
990static void inc_io_migrations(struct cache *cache)
991{
992 atomic_inc(&cache->nr_io_migrations);
993}
994
995static void dec_io_migrations(struct cache *cache)
996{
997 atomic_dec(&cache->nr_io_migrations);
998}
999
1000static bool discard_or_flush(struct bio *bio)
1001{
1002 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1003}
1004
1005static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1006 dm_dblock_t *b, dm_dblock_t *e)
1007{
1008 sector_t sb = bio->bi_iter.bi_sector;
1009 sector_t se = bio_end_sector(bio);
1010
1011 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1012
1013 if (se - sb < cache->discard_block_size)
1014 *e = *b;
1015 else
1016 *e = to_dblock(block_div(se, cache->discard_block_size));
1017}
1018
1019
1020
1021static void prevent_background_work(struct cache *cache)
1022{
1023 lockdep_off();
1024 down_write(&cache->background_work_lock);
1025 lockdep_on();
1026}
1027
1028static void allow_background_work(struct cache *cache)
1029{
1030 lockdep_off();
1031 up_write(&cache->background_work_lock);
1032 lockdep_on();
1033}
1034
1035static bool background_work_begin(struct cache *cache)
1036{
1037 bool r;
1038
1039 lockdep_off();
1040 r = down_read_trylock(&cache->background_work_lock);
1041 lockdep_on();
1042
1043 return r;
1044}
1045
1046static void background_work_end(struct cache *cache)
1047{
1048 lockdep_off();
1049 up_read(&cache->background_work_lock);
1050 lockdep_on();
1051}
1052
1053
1054
1055static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1056{
1057 return (bio_data_dir(bio) == WRITE) &&
1058 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1059}
1060
1061static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1062{
1063 return writeback_mode(cache) &&
1064 (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1065}
1066
1067static void quiesce(struct dm_cache_migration *mg,
1068 void (*continuation)(struct work_struct *))
1069{
1070 init_continuation(&mg->k, continuation);
1071 dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1072}
1073
1074static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1075{
1076 struct continuation *k = container_of(ws, struct continuation, ws);
1077 return container_of(k, struct dm_cache_migration, k);
1078}
1079
1080static void copy_complete(int read_err, unsigned long write_err, void *context)
1081{
1082 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1083
1084 if (read_err || write_err)
1085 mg->k.input = BLK_STS_IOERR;
1086
1087 queue_continuation(mg->cache->wq, &mg->k);
1088}
1089
1090static void copy(struct dm_cache_migration *mg, bool promote)
1091{
1092 struct dm_io_region o_region, c_region;
1093 struct cache *cache = mg->cache;
1094
1095 o_region.bdev = cache->origin_dev->bdev;
1096 o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1097 o_region.count = cache->sectors_per_block;
1098
1099 c_region.bdev = cache->cache_dev->bdev;
1100 c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1101 c_region.count = cache->sectors_per_block;
1102
1103 if (promote)
1104 dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1105 else
1106 dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1107}
1108
1109static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1110{
1111 struct per_bio_data *pb = get_per_bio_data(bio);
1112
1113 if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1114 free_prison_cell(cache, pb->cell);
1115 pb->cell = NULL;
1116}
1117
1118static void overwrite_endio(struct bio *bio)
1119{
1120 struct dm_cache_migration *mg = bio->bi_private;
1121 struct cache *cache = mg->cache;
1122 struct per_bio_data *pb = get_per_bio_data(bio);
1123
1124 dm_unhook_bio(&pb->hook_info, bio);
1125
1126 if (bio->bi_status)
1127 mg->k.input = bio->bi_status;
1128
1129 queue_continuation(cache->wq, &mg->k);
1130}
1131
1132static void overwrite(struct dm_cache_migration *mg,
1133 void (*continuation)(struct work_struct *))
1134{
1135 struct bio *bio = mg->overwrite_bio;
1136 struct per_bio_data *pb = get_per_bio_data(bio);
1137
1138 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1139
1140
1141
1142
1143
1144 if (mg->op->op == POLICY_PROMOTE)
1145 remap_to_cache(mg->cache, bio, mg->op->cblock);
1146 else
1147 remap_to_origin(mg->cache, bio);
1148
1149 init_continuation(&mg->k, continuation);
1150 accounted_request(mg->cache, bio);
1151}
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164static void mg_complete(struct dm_cache_migration *mg, bool success)
1165{
1166 struct bio_list bios;
1167 struct cache *cache = mg->cache;
1168 struct policy_work *op = mg->op;
1169 dm_cblock_t cblock = op->cblock;
1170
1171 if (success)
1172 update_stats(&cache->stats, op->op);
1173
1174 switch (op->op) {
1175 case POLICY_PROMOTE:
1176 clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1177 policy_complete_background_work(cache->policy, op, success);
1178
1179 if (mg->overwrite_bio) {
1180 if (success)
1181 force_set_dirty(cache, cblock);
1182 else if (mg->k.input)
1183 mg->overwrite_bio->bi_status = mg->k.input;
1184 else
1185 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1186 bio_endio(mg->overwrite_bio);
1187 } else {
1188 if (success)
1189 force_clear_dirty(cache, cblock);
1190 dec_io_migrations(cache);
1191 }
1192 break;
1193
1194 case POLICY_DEMOTE:
1195
1196
1197
1198 if (success)
1199 force_clear_dirty(cache, cblock);
1200 policy_complete_background_work(cache->policy, op, success);
1201 dec_io_migrations(cache);
1202 break;
1203
1204 case POLICY_WRITEBACK:
1205 if (success)
1206 force_clear_dirty(cache, cblock);
1207 policy_complete_background_work(cache->policy, op, success);
1208 dec_io_migrations(cache);
1209 break;
1210 }
1211
1212 bio_list_init(&bios);
1213 if (mg->cell) {
1214 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1215 free_prison_cell(cache, mg->cell);
1216 }
1217
1218 free_migration(mg);
1219 defer_bios(cache, &bios);
1220 wake_migration_worker(cache);
1221
1222 background_work_end(cache);
1223}
1224
1225static void mg_success(struct work_struct *ws)
1226{
1227 struct dm_cache_migration *mg = ws_to_mg(ws);
1228 mg_complete(mg, mg->k.input == 0);
1229}
1230
1231static void mg_update_metadata(struct work_struct *ws)
1232{
1233 int r;
1234 struct dm_cache_migration *mg = ws_to_mg(ws);
1235 struct cache *cache = mg->cache;
1236 struct policy_work *op = mg->op;
1237
1238 switch (op->op) {
1239 case POLICY_PROMOTE:
1240 r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1241 if (r) {
1242 DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1243 cache_device_name(cache));
1244 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1245
1246 mg_complete(mg, false);
1247 return;
1248 }
1249 mg_complete(mg, true);
1250 break;
1251
1252 case POLICY_DEMOTE:
1253 r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1254 if (r) {
1255 DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1256 cache_device_name(cache));
1257 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1258
1259 mg_complete(mg, false);
1260 return;
1261 }
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282 init_continuation(&mg->k, mg_success);
1283 continue_after_commit(&cache->committer, &mg->k);
1284 schedule_commit(&cache->committer);
1285 break;
1286
1287 case POLICY_WRITEBACK:
1288 mg_complete(mg, true);
1289 break;
1290 }
1291}
1292
1293static void mg_update_metadata_after_copy(struct work_struct *ws)
1294{
1295 struct dm_cache_migration *mg = ws_to_mg(ws);
1296
1297
1298
1299
1300 if (mg->k.input)
1301 mg_complete(mg, false);
1302 else
1303 mg_update_metadata(ws);
1304}
1305
1306static void mg_upgrade_lock(struct work_struct *ws)
1307{
1308 int r;
1309 struct dm_cache_migration *mg = ws_to_mg(ws);
1310
1311
1312
1313
1314 if (mg->k.input)
1315 mg_complete(mg, false);
1316
1317 else {
1318
1319
1320
1321 r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1322 READ_WRITE_LOCK_LEVEL);
1323 if (r < 0)
1324 mg_complete(mg, false);
1325
1326 else if (r)
1327 quiesce(mg, mg_update_metadata);
1328
1329 else
1330 mg_update_metadata(ws);
1331 }
1332}
1333
1334static void mg_full_copy(struct work_struct *ws)
1335{
1336 struct dm_cache_migration *mg = ws_to_mg(ws);
1337 struct cache *cache = mg->cache;
1338 struct policy_work *op = mg->op;
1339 bool is_policy_promote = (op->op == POLICY_PROMOTE);
1340
1341 if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1342 is_discarded_oblock(cache, op->oblock)) {
1343 mg_upgrade_lock(ws);
1344 return;
1345 }
1346
1347 init_continuation(&mg->k, mg_upgrade_lock);
1348 copy(mg, is_policy_promote);
1349}
1350
1351static void mg_copy(struct work_struct *ws)
1352{
1353 struct dm_cache_migration *mg = ws_to_mg(ws);
1354
1355 if (mg->overwrite_bio) {
1356
1357
1358
1359
1360
1361 if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
1362
1363
1364
1365 bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
1366 BUG_ON(rb);
1367 mg->overwrite_bio = NULL;
1368 inc_io_migrations(mg->cache);
1369 mg_full_copy(ws);
1370 return;
1371 }
1372
1373
1374
1375
1376
1377
1378
1379
1380 overwrite(mg, mg_update_metadata_after_copy);
1381
1382 } else
1383 mg_full_copy(ws);
1384}
1385
1386static int mg_lock_writes(struct dm_cache_migration *mg)
1387{
1388 int r;
1389 struct dm_cell_key_v2 key;
1390 struct cache *cache = mg->cache;
1391 struct dm_bio_prison_cell_v2 *prealloc;
1392
1393 prealloc = alloc_prison_cell(cache);
1394
1395
1396
1397
1398
1399
1400 build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1401 r = dm_cell_lock_v2(cache->prison, &key,
1402 mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1403 prealloc, &mg->cell);
1404 if (r < 0) {
1405 free_prison_cell(cache, prealloc);
1406 mg_complete(mg, false);
1407 return r;
1408 }
1409
1410 if (mg->cell != prealloc)
1411 free_prison_cell(cache, prealloc);
1412
1413 if (r == 0)
1414 mg_copy(&mg->k.ws);
1415 else
1416 quiesce(mg, mg_copy);
1417
1418 return 0;
1419}
1420
1421static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1422{
1423 struct dm_cache_migration *mg;
1424
1425 if (!background_work_begin(cache)) {
1426 policy_complete_background_work(cache->policy, op, false);
1427 return -EPERM;
1428 }
1429
1430 mg = alloc_migration(cache);
1431
1432 mg->op = op;
1433 mg->overwrite_bio = bio;
1434
1435 if (!bio)
1436 inc_io_migrations(cache);
1437
1438 return mg_lock_writes(mg);
1439}
1440
1441
1442
1443
1444
1445static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1446{
1447 struct bio_list bios;
1448 struct cache *cache = mg->cache;
1449
1450 bio_list_init(&bios);
1451 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1452 free_prison_cell(cache, mg->cell);
1453
1454 if (!success && mg->overwrite_bio)
1455 bio_io_error(mg->overwrite_bio);
1456
1457 free_migration(mg);
1458 defer_bios(cache, &bios);
1459
1460 background_work_end(cache);
1461}
1462
1463static void invalidate_completed(struct work_struct *ws)
1464{
1465 struct dm_cache_migration *mg = ws_to_mg(ws);
1466 invalidate_complete(mg, !mg->k.input);
1467}
1468
1469static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1470{
1471 int r = policy_invalidate_mapping(cache->policy, cblock);
1472 if (!r) {
1473 r = dm_cache_remove_mapping(cache->cmd, cblock);
1474 if (r) {
1475 DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1476 cache_device_name(cache));
1477 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1478 }
1479
1480 } else if (r == -ENODATA) {
1481
1482
1483
1484 r = 0;
1485
1486 } else
1487 DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1488
1489 return r;
1490}
1491
1492static void invalidate_remove(struct work_struct *ws)
1493{
1494 int r;
1495 struct dm_cache_migration *mg = ws_to_mg(ws);
1496 struct cache *cache = mg->cache;
1497
1498 r = invalidate_cblock(cache, mg->invalidate_cblock);
1499 if (r) {
1500 invalidate_complete(mg, false);
1501 return;
1502 }
1503
1504 init_continuation(&mg->k, invalidate_completed);
1505 continue_after_commit(&cache->committer, &mg->k);
1506 remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1507 mg->overwrite_bio = NULL;
1508 schedule_commit(&cache->committer);
1509}
1510
1511static int invalidate_lock(struct dm_cache_migration *mg)
1512{
1513 int r;
1514 struct dm_cell_key_v2 key;
1515 struct cache *cache = mg->cache;
1516 struct dm_bio_prison_cell_v2 *prealloc;
1517
1518 prealloc = alloc_prison_cell(cache);
1519
1520 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1521 r = dm_cell_lock_v2(cache->prison, &key,
1522 READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1523 if (r < 0) {
1524 free_prison_cell(cache, prealloc);
1525 invalidate_complete(mg, false);
1526 return r;
1527 }
1528
1529 if (mg->cell != prealloc)
1530 free_prison_cell(cache, prealloc);
1531
1532 if (r)
1533 quiesce(mg, invalidate_remove);
1534
1535 else {
1536
1537
1538
1539
1540 init_continuation(&mg->k, invalidate_remove);
1541 queue_work(cache->wq, &mg->k.ws);
1542 }
1543
1544 return 0;
1545}
1546
1547static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1548 dm_oblock_t oblock, struct bio *bio)
1549{
1550 struct dm_cache_migration *mg;
1551
1552 if (!background_work_begin(cache))
1553 return -EPERM;
1554
1555 mg = alloc_migration(cache);
1556
1557 mg->overwrite_bio = bio;
1558 mg->invalidate_cblock = cblock;
1559 mg->invalidate_oblock = oblock;
1560
1561 return invalidate_lock(mg);
1562}
1563
1564
1565
1566
1567
1568enum busy {
1569 IDLE,
1570 BUSY
1571};
1572
1573static enum busy spare_migration_bandwidth(struct cache *cache)
1574{
1575 bool idle = dm_iot_idle_for(&cache->tracker, HZ);
1576 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1577 cache->sectors_per_block;
1578
1579 if (idle && current_volume <= cache->migration_threshold)
1580 return IDLE;
1581 else
1582 return BUSY;
1583}
1584
1585static void inc_hit_counter(struct cache *cache, struct bio *bio)
1586{
1587 atomic_inc(bio_data_dir(bio) == READ ?
1588 &cache->stats.read_hit : &cache->stats.write_hit);
1589}
1590
1591static void inc_miss_counter(struct cache *cache, struct bio *bio)
1592{
1593 atomic_inc(bio_data_dir(bio) == READ ?
1594 &cache->stats.read_miss : &cache->stats.write_miss);
1595}
1596
1597
1598
1599static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1600 bool *commit_needed)
1601{
1602 int r, data_dir;
1603 bool rb, background_queued;
1604 dm_cblock_t cblock;
1605
1606 *commit_needed = false;
1607
1608 rb = bio_detain_shared(cache, block, bio);
1609 if (!rb) {
1610
1611
1612
1613
1614
1615
1616 *commit_needed = true;
1617 return DM_MAPIO_SUBMITTED;
1618 }
1619
1620 data_dir = bio_data_dir(bio);
1621
1622 if (optimisable_bio(cache, bio, block)) {
1623 struct policy_work *op = NULL;
1624
1625 r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1626 if (unlikely(r && r != -ENOENT)) {
1627 DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1628 cache_device_name(cache), r);
1629 bio_io_error(bio);
1630 return DM_MAPIO_SUBMITTED;
1631 }
1632
1633 if (r == -ENOENT && op) {
1634 bio_drop_shared_lock(cache, bio);
1635 BUG_ON(op->op != POLICY_PROMOTE);
1636 mg_start(cache, op, bio);
1637 return DM_MAPIO_SUBMITTED;
1638 }
1639 } else {
1640 r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1641 if (unlikely(r && r != -ENOENT)) {
1642 DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1643 cache_device_name(cache), r);
1644 bio_io_error(bio);
1645 return DM_MAPIO_SUBMITTED;
1646 }
1647
1648 if (background_queued)
1649 wake_migration_worker(cache);
1650 }
1651
1652 if (r == -ENOENT) {
1653 struct per_bio_data *pb = get_per_bio_data(bio);
1654
1655
1656
1657
1658 inc_miss_counter(cache, bio);
1659 if (pb->req_nr == 0) {
1660 accounted_begin(cache, bio);
1661 remap_to_origin_clear_discard(cache, bio, block);
1662 } else {
1663
1664
1665
1666
1667 bio_endio(bio);
1668 return DM_MAPIO_SUBMITTED;
1669 }
1670 } else {
1671
1672
1673
1674 inc_hit_counter(cache, bio);
1675
1676
1677
1678
1679
1680 if (passthrough_mode(cache)) {
1681 if (bio_data_dir(bio) == WRITE) {
1682 bio_drop_shared_lock(cache, bio);
1683 atomic_inc(&cache->stats.demotion);
1684 invalidate_start(cache, cblock, block, bio);
1685 } else
1686 remap_to_origin_clear_discard(cache, bio, block);
1687 } else {
1688 if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
1689 !is_dirty(cache, cblock)) {
1690 remap_to_origin_and_cache(cache, bio, block, cblock);
1691 accounted_begin(cache, bio);
1692 } else
1693 remap_to_cache_dirty(cache, bio, block, cblock);
1694 }
1695 }
1696
1697
1698
1699
1700 if (bio->bi_opf & REQ_FUA) {
1701
1702
1703
1704
1705 accounted_complete(cache, bio);
1706 issue_after_commit(&cache->committer, bio);
1707 *commit_needed = true;
1708 return DM_MAPIO_SUBMITTED;
1709 }
1710
1711 return DM_MAPIO_REMAPPED;
1712}
1713
1714static bool process_bio(struct cache *cache, struct bio *bio)
1715{
1716 bool commit_needed;
1717
1718 if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1719 submit_bio_noacct(bio);
1720
1721 return commit_needed;
1722}
1723
1724
1725
1726
1727static int commit(struct cache *cache, bool clean_shutdown)
1728{
1729 int r;
1730
1731 if (get_cache_mode(cache) >= CM_READ_ONLY)
1732 return -EINVAL;
1733
1734 atomic_inc(&cache->stats.commit_count);
1735 r = dm_cache_commit(cache->cmd, clean_shutdown);
1736 if (r)
1737 metadata_operation_failed(cache, "dm_cache_commit", r);
1738
1739 return r;
1740}
1741
1742
1743
1744
1745static blk_status_t commit_op(void *context)
1746{
1747 struct cache *cache = context;
1748
1749 if (dm_cache_changed_this_transaction(cache->cmd))
1750 return errno_to_blk_status(commit(cache, false));
1751
1752 return 0;
1753}
1754
1755
1756
1757static bool process_flush_bio(struct cache *cache, struct bio *bio)
1758{
1759 struct per_bio_data *pb = get_per_bio_data(bio);
1760
1761 if (!pb->req_nr)
1762 remap_to_origin(cache, bio);
1763 else
1764 remap_to_cache(cache, bio, 0);
1765
1766 issue_after_commit(&cache->committer, bio);
1767 return true;
1768}
1769
1770static bool process_discard_bio(struct cache *cache, struct bio *bio)
1771{
1772 dm_dblock_t b, e;
1773
1774
1775
1776
1777 calc_discard_block_range(cache, bio, &b, &e);
1778 while (b != e) {
1779 set_discard(cache, b);
1780 b = to_dblock(from_dblock(b) + 1);
1781 }
1782
1783 if (cache->features.discard_passdown) {
1784 remap_to_origin(cache, bio);
1785 submit_bio_noacct(bio);
1786 } else
1787 bio_endio(bio);
1788
1789 return false;
1790}
1791
1792static void process_deferred_bios(struct work_struct *ws)
1793{
1794 struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1795
1796 bool commit_needed = false;
1797 struct bio_list bios;
1798 struct bio *bio;
1799
1800 bio_list_init(&bios);
1801
1802 spin_lock_irq(&cache->lock);
1803 bio_list_merge(&bios, &cache->deferred_bios);
1804 bio_list_init(&cache->deferred_bios);
1805 spin_unlock_irq(&cache->lock);
1806
1807 while ((bio = bio_list_pop(&bios))) {
1808 if (bio->bi_opf & REQ_PREFLUSH)
1809 commit_needed = process_flush_bio(cache, bio) || commit_needed;
1810
1811 else if (bio_op(bio) == REQ_OP_DISCARD)
1812 commit_needed = process_discard_bio(cache, bio) || commit_needed;
1813
1814 else
1815 commit_needed = process_bio(cache, bio) || commit_needed;
1816 }
1817
1818 if (commit_needed)
1819 schedule_commit(&cache->committer);
1820}
1821
1822
1823
1824
1825
1826static void requeue_deferred_bios(struct cache *cache)
1827{
1828 struct bio *bio;
1829 struct bio_list bios;
1830
1831 bio_list_init(&bios);
1832 bio_list_merge(&bios, &cache->deferred_bios);
1833 bio_list_init(&cache->deferred_bios);
1834
1835 while ((bio = bio_list_pop(&bios))) {
1836 bio->bi_status = BLK_STS_DM_REQUEUE;
1837 bio_endio(bio);
1838 }
1839}
1840
1841
1842
1843
1844
1845static void do_waker(struct work_struct *ws)
1846{
1847 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1848
1849 policy_tick(cache->policy, true);
1850 wake_migration_worker(cache);
1851 schedule_commit(&cache->committer);
1852 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1853}
1854
1855static void check_migrations(struct work_struct *ws)
1856{
1857 int r;
1858 struct policy_work *op;
1859 struct cache *cache = container_of(ws, struct cache, migration_worker);
1860 enum busy b;
1861
1862 for (;;) {
1863 b = spare_migration_bandwidth(cache);
1864
1865 r = policy_get_background_work(cache->policy, b == IDLE, &op);
1866 if (r == -ENODATA)
1867 break;
1868
1869 if (r) {
1870 DMERR_LIMIT("%s: policy_background_work failed",
1871 cache_device_name(cache));
1872 break;
1873 }
1874
1875 r = mg_start(cache, op, NULL);
1876 if (r)
1877 break;
1878 }
1879}
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889static void destroy(struct cache *cache)
1890{
1891 unsigned i;
1892
1893 mempool_exit(&cache->migration_pool);
1894
1895 if (cache->prison)
1896 dm_bio_prison_destroy_v2(cache->prison);
1897
1898 if (cache->wq)
1899 destroy_workqueue(cache->wq);
1900
1901 if (cache->dirty_bitset)
1902 free_bitset(cache->dirty_bitset);
1903
1904 if (cache->discard_bitset)
1905 free_bitset(cache->discard_bitset);
1906
1907 if (cache->copier)
1908 dm_kcopyd_client_destroy(cache->copier);
1909
1910 if (cache->cmd)
1911 dm_cache_metadata_close(cache->cmd);
1912
1913 if (cache->metadata_dev)
1914 dm_put_device(cache->ti, cache->metadata_dev);
1915
1916 if (cache->origin_dev)
1917 dm_put_device(cache->ti, cache->origin_dev);
1918
1919 if (cache->cache_dev)
1920 dm_put_device(cache->ti, cache->cache_dev);
1921
1922 if (cache->policy)
1923 dm_cache_policy_destroy(cache->policy);
1924
1925 for (i = 0; i < cache->nr_ctr_args ; i++)
1926 kfree(cache->ctr_args[i]);
1927 kfree(cache->ctr_args);
1928
1929 bioset_exit(&cache->bs);
1930
1931 kfree(cache);
1932}
1933
1934static void cache_dtr(struct dm_target *ti)
1935{
1936 struct cache *cache = ti->private;
1937
1938 destroy(cache);
1939}
1940
1941static sector_t get_dev_size(struct dm_dev *dev)
1942{
1943 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
1944}
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977struct cache_args {
1978 struct dm_target *ti;
1979
1980 struct dm_dev *metadata_dev;
1981
1982 struct dm_dev *cache_dev;
1983 sector_t cache_sectors;
1984
1985 struct dm_dev *origin_dev;
1986 sector_t origin_sectors;
1987
1988 uint32_t block_size;
1989
1990 const char *policy_name;
1991 int policy_argc;
1992 const char **policy_argv;
1993
1994 struct cache_features features;
1995};
1996
1997static void destroy_cache_args(struct cache_args *ca)
1998{
1999 if (ca->metadata_dev)
2000 dm_put_device(ca->ti, ca->metadata_dev);
2001
2002 if (ca->cache_dev)
2003 dm_put_device(ca->ti, ca->cache_dev);
2004
2005 if (ca->origin_dev)
2006 dm_put_device(ca->ti, ca->origin_dev);
2007
2008 kfree(ca);
2009}
2010
2011static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2012{
2013 if (!as->argc) {
2014 *error = "Insufficient args";
2015 return false;
2016 }
2017
2018 return true;
2019}
2020
2021static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2022 char **error)
2023{
2024 int r;
2025 sector_t metadata_dev_size;
2026 char b[BDEVNAME_SIZE];
2027
2028 if (!at_least_one_arg(as, error))
2029 return -EINVAL;
2030
2031 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2032 &ca->metadata_dev);
2033 if (r) {
2034 *error = "Error opening metadata device";
2035 return r;
2036 }
2037
2038 metadata_dev_size = get_dev_size(ca->metadata_dev);
2039 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2040 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2041 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2042
2043 return 0;
2044}
2045
2046static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2047 char **error)
2048{
2049 int r;
2050
2051 if (!at_least_one_arg(as, error))
2052 return -EINVAL;
2053
2054 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2055 &ca->cache_dev);
2056 if (r) {
2057 *error = "Error opening cache device";
2058 return r;
2059 }
2060 ca->cache_sectors = get_dev_size(ca->cache_dev);
2061
2062 return 0;
2063}
2064
2065static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2066 char **error)
2067{
2068 int r;
2069
2070 if (!at_least_one_arg(as, error))
2071 return -EINVAL;
2072
2073 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2074 &ca->origin_dev);
2075 if (r) {
2076 *error = "Error opening origin device";
2077 return r;
2078 }
2079
2080 ca->origin_sectors = get_dev_size(ca->origin_dev);
2081 if (ca->ti->len > ca->origin_sectors) {
2082 *error = "Device size larger than cached device";
2083 return -EINVAL;
2084 }
2085
2086 return 0;
2087}
2088
2089static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2090 char **error)
2091{
2092 unsigned long block_size;
2093
2094 if (!at_least_one_arg(as, error))
2095 return -EINVAL;
2096
2097 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2098 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2099 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2100 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2101 *error = "Invalid data block size";
2102 return -EINVAL;
2103 }
2104
2105 if (block_size > ca->cache_sectors) {
2106 *error = "Data block size is larger than the cache device";
2107 return -EINVAL;
2108 }
2109
2110 ca->block_size = block_size;
2111
2112 return 0;
2113}
2114
2115static void init_features(struct cache_features *cf)
2116{
2117 cf->mode = CM_WRITE;
2118 cf->io_mode = CM_IO_WRITEBACK;
2119 cf->metadata_version = 1;
2120 cf->discard_passdown = true;
2121}
2122
2123static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2124 char **error)
2125{
2126 static const struct dm_arg _args[] = {
2127 {0, 3, "Invalid number of cache feature arguments"},
2128 };
2129
2130 int r, mode_ctr = 0;
2131 unsigned argc;
2132 const char *arg;
2133 struct cache_features *cf = &ca->features;
2134
2135 init_features(cf);
2136
2137 r = dm_read_arg_group(_args, as, &argc, error);
2138 if (r)
2139 return -EINVAL;
2140
2141 while (argc--) {
2142 arg = dm_shift_arg(as);
2143
2144 if (!strcasecmp(arg, "writeback")) {
2145 cf->io_mode = CM_IO_WRITEBACK;
2146 mode_ctr++;
2147 }
2148
2149 else if (!strcasecmp(arg, "writethrough")) {
2150 cf->io_mode = CM_IO_WRITETHROUGH;
2151 mode_ctr++;
2152 }
2153
2154 else if (!strcasecmp(arg, "passthrough")) {
2155 cf->io_mode = CM_IO_PASSTHROUGH;
2156 mode_ctr++;
2157 }
2158
2159 else if (!strcasecmp(arg, "metadata2"))
2160 cf->metadata_version = 2;
2161
2162 else if (!strcasecmp(arg, "no_discard_passdown"))
2163 cf->discard_passdown = false;
2164
2165 else {
2166 *error = "Unrecognised cache feature requested";
2167 return -EINVAL;
2168 }
2169 }
2170
2171 if (mode_ctr > 1) {
2172 *error = "Duplicate cache io_mode features requested";
2173 return -EINVAL;
2174 }
2175
2176 return 0;
2177}
2178
2179static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2180 char **error)
2181{
2182 static const struct dm_arg _args[] = {
2183 {0, 1024, "Invalid number of policy arguments"},
2184 };
2185
2186 int r;
2187
2188 if (!at_least_one_arg(as, error))
2189 return -EINVAL;
2190
2191 ca->policy_name = dm_shift_arg(as);
2192
2193 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2194 if (r)
2195 return -EINVAL;
2196
2197 ca->policy_argv = (const char **)as->argv;
2198 dm_consume_args(as, ca->policy_argc);
2199
2200 return 0;
2201}
2202
2203static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2204 char **error)
2205{
2206 int r;
2207 struct dm_arg_set as;
2208
2209 as.argc = argc;
2210 as.argv = argv;
2211
2212 r = parse_metadata_dev(ca, &as, error);
2213 if (r)
2214 return r;
2215
2216 r = parse_cache_dev(ca, &as, error);
2217 if (r)
2218 return r;
2219
2220 r = parse_origin_dev(ca, &as, error);
2221 if (r)
2222 return r;
2223
2224 r = parse_block_size(ca, &as, error);
2225 if (r)
2226 return r;
2227
2228 r = parse_features(ca, &as, error);
2229 if (r)
2230 return r;
2231
2232 r = parse_policy(ca, &as, error);
2233 if (r)
2234 return r;
2235
2236 return 0;
2237}
2238
2239
2240
2241static struct kmem_cache *migration_cache;
2242
2243#define NOT_CORE_OPTION 1
2244
2245static int process_config_option(struct cache *cache, const char *key, const char *value)
2246{
2247 unsigned long tmp;
2248
2249 if (!strcasecmp(key, "migration_threshold")) {
2250 if (kstrtoul(value, 10, &tmp))
2251 return -EINVAL;
2252
2253 cache->migration_threshold = tmp;
2254 return 0;
2255 }
2256
2257 return NOT_CORE_OPTION;
2258}
2259
2260static int set_config_value(struct cache *cache, const char *key, const char *value)
2261{
2262 int r = process_config_option(cache, key, value);
2263
2264 if (r == NOT_CORE_OPTION)
2265 r = policy_set_config_value(cache->policy, key, value);
2266
2267 if (r)
2268 DMWARN("bad config value for %s: %s", key, value);
2269
2270 return r;
2271}
2272
2273static int set_config_values(struct cache *cache, int argc, const char **argv)
2274{
2275 int r = 0;
2276
2277 if (argc & 1) {
2278 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2279 return -EINVAL;
2280 }
2281
2282 while (argc) {
2283 r = set_config_value(cache, argv[0], argv[1]);
2284 if (r)
2285 break;
2286
2287 argc -= 2;
2288 argv += 2;
2289 }
2290
2291 return r;
2292}
2293
2294static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2295 char **error)
2296{
2297 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2298 cache->cache_size,
2299 cache->origin_sectors,
2300 cache->sectors_per_block);
2301 if (IS_ERR(p)) {
2302 *error = "Error creating cache's policy";
2303 return PTR_ERR(p);
2304 }
2305 cache->policy = p;
2306 BUG_ON(!cache->policy);
2307
2308 return 0;
2309}
2310
2311
2312
2313
2314
2315#define MAX_DISCARD_BLOCKS (1 << 14)
2316
2317static bool too_many_discard_blocks(sector_t discard_block_size,
2318 sector_t origin_size)
2319{
2320 (void) sector_div(origin_size, discard_block_size);
2321
2322 return origin_size > MAX_DISCARD_BLOCKS;
2323}
2324
2325static sector_t calculate_discard_block_size(sector_t cache_block_size,
2326 sector_t origin_size)
2327{
2328 sector_t discard_block_size = cache_block_size;
2329
2330 if (origin_size)
2331 while (too_many_discard_blocks(discard_block_size, origin_size))
2332 discard_block_size *= 2;
2333
2334 return discard_block_size;
2335}
2336
2337static void set_cache_size(struct cache *cache, dm_cblock_t size)
2338{
2339 dm_block_t nr_blocks = from_cblock(size);
2340
2341 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2342 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2343 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2344 "Please consider increasing the cache block size to reduce the overall cache block count.",
2345 (unsigned long long) nr_blocks);
2346
2347 cache->cache_size = size;
2348}
2349
2350#define DEFAULT_MIGRATION_THRESHOLD 2048
2351
2352static int cache_create(struct cache_args *ca, struct cache **result)
2353{
2354 int r = 0;
2355 char **error = &ca->ti->error;
2356 struct cache *cache;
2357 struct dm_target *ti = ca->ti;
2358 dm_block_t origin_blocks;
2359 struct dm_cache_metadata *cmd;
2360 bool may_format = ca->features.mode == CM_WRITE;
2361
2362 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2363 if (!cache)
2364 return -ENOMEM;
2365
2366 cache->ti = ca->ti;
2367 ti->private = cache;
2368 ti->num_flush_bios = 2;
2369 ti->flush_supported = true;
2370
2371 ti->num_discard_bios = 1;
2372 ti->discards_supported = true;
2373
2374 ti->per_io_data_size = sizeof(struct per_bio_data);
2375
2376 cache->features = ca->features;
2377 if (writethrough_mode(cache)) {
2378
2379 r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
2380 if (r)
2381 goto bad;
2382 }
2383
2384 cache->metadata_dev = ca->metadata_dev;
2385 cache->origin_dev = ca->origin_dev;
2386 cache->cache_dev = ca->cache_dev;
2387
2388 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2389
2390 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2391 origin_blocks = block_div(origin_blocks, ca->block_size);
2392 cache->origin_blocks = to_oblock(origin_blocks);
2393
2394 cache->sectors_per_block = ca->block_size;
2395 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2396 r = -EINVAL;
2397 goto bad;
2398 }
2399
2400 if (ca->block_size & (ca->block_size - 1)) {
2401 dm_block_t cache_size = ca->cache_sectors;
2402
2403 cache->sectors_per_block_shift = -1;
2404 cache_size = block_div(cache_size, ca->block_size);
2405 set_cache_size(cache, to_cblock(cache_size));
2406 } else {
2407 cache->sectors_per_block_shift = __ffs(ca->block_size);
2408 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2409 }
2410
2411 r = create_cache_policy(cache, ca, error);
2412 if (r)
2413 goto bad;
2414
2415 cache->policy_nr_args = ca->policy_argc;
2416 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2417
2418 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2419 if (r) {
2420 *error = "Error setting cache policy's config values";
2421 goto bad;
2422 }
2423
2424 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2425 ca->block_size, may_format,
2426 dm_cache_policy_get_hint_size(cache->policy),
2427 ca->features.metadata_version);
2428 if (IS_ERR(cmd)) {
2429 *error = "Error creating metadata object";
2430 r = PTR_ERR(cmd);
2431 goto bad;
2432 }
2433 cache->cmd = cmd;
2434 set_cache_mode(cache, CM_WRITE);
2435 if (get_cache_mode(cache) != CM_WRITE) {
2436 *error = "Unable to get write access to metadata, please check/repair metadata.";
2437 r = -EINVAL;
2438 goto bad;
2439 }
2440
2441 if (passthrough_mode(cache)) {
2442 bool all_clean;
2443
2444 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2445 if (r) {
2446 *error = "dm_cache_metadata_all_clean() failed";
2447 goto bad;
2448 }
2449
2450 if (!all_clean) {
2451 *error = "Cannot enter passthrough mode unless all blocks are clean";
2452 r = -EINVAL;
2453 goto bad;
2454 }
2455
2456 policy_allow_migrations(cache->policy, false);
2457 }
2458
2459 spin_lock_init(&cache->lock);
2460 bio_list_init(&cache->deferred_bios);
2461 atomic_set(&cache->nr_allocated_migrations, 0);
2462 atomic_set(&cache->nr_io_migrations, 0);
2463 init_waitqueue_head(&cache->migration_wait);
2464
2465 r = -ENOMEM;
2466 atomic_set(&cache->nr_dirty, 0);
2467 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2468 if (!cache->dirty_bitset) {
2469 *error = "could not allocate dirty bitset";
2470 goto bad;
2471 }
2472 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2473
2474 cache->discard_block_size =
2475 calculate_discard_block_size(cache->sectors_per_block,
2476 cache->origin_sectors);
2477 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2478 cache->discard_block_size));
2479 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2480 if (!cache->discard_bitset) {
2481 *error = "could not allocate discard bitset";
2482 goto bad;
2483 }
2484 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2485
2486 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2487 if (IS_ERR(cache->copier)) {
2488 *error = "could not create kcopyd client";
2489 r = PTR_ERR(cache->copier);
2490 goto bad;
2491 }
2492
2493 cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2494 if (!cache->wq) {
2495 *error = "could not create workqueue for metadata object";
2496 goto bad;
2497 }
2498 INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2499 INIT_WORK(&cache->migration_worker, check_migrations);
2500 INIT_DELAYED_WORK(&cache->waker, do_waker);
2501
2502 cache->prison = dm_bio_prison_create_v2(cache->wq);
2503 if (!cache->prison) {
2504 *error = "could not create bio prison";
2505 goto bad;
2506 }
2507
2508 r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
2509 migration_cache);
2510 if (r) {
2511 *error = "Error creating cache's migration mempool";
2512 goto bad;
2513 }
2514
2515 cache->need_tick_bio = true;
2516 cache->sized = false;
2517 cache->invalidate = false;
2518 cache->commit_requested = false;
2519 cache->loaded_mappings = false;
2520 cache->loaded_discards = false;
2521
2522 load_stats(cache);
2523
2524 atomic_set(&cache->stats.demotion, 0);
2525 atomic_set(&cache->stats.promotion, 0);
2526 atomic_set(&cache->stats.copies_avoided, 0);
2527 atomic_set(&cache->stats.cache_cell_clash, 0);
2528 atomic_set(&cache->stats.commit_count, 0);
2529 atomic_set(&cache->stats.discard_count, 0);
2530
2531 spin_lock_init(&cache->invalidation_lock);
2532 INIT_LIST_HEAD(&cache->invalidation_requests);
2533
2534 batcher_init(&cache->committer, commit_op, cache,
2535 issue_op, cache, cache->wq);
2536 dm_iot_init(&cache->tracker);
2537
2538 init_rwsem(&cache->background_work_lock);
2539 prevent_background_work(cache);
2540
2541 *result = cache;
2542 return 0;
2543bad:
2544 destroy(cache);
2545 return r;
2546}
2547
2548static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2549{
2550 unsigned i;
2551 const char **copy;
2552
2553 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2554 if (!copy)
2555 return -ENOMEM;
2556 for (i = 0; i < argc; i++) {
2557 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2558 if (!copy[i]) {
2559 while (i--)
2560 kfree(copy[i]);
2561 kfree(copy);
2562 return -ENOMEM;
2563 }
2564 }
2565
2566 cache->nr_ctr_args = argc;
2567 cache->ctr_args = copy;
2568
2569 return 0;
2570}
2571
2572static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2573{
2574 int r = -EINVAL;
2575 struct cache_args *ca;
2576 struct cache *cache = NULL;
2577
2578 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2579 if (!ca) {
2580 ti->error = "Error allocating memory for cache";
2581 return -ENOMEM;
2582 }
2583 ca->ti = ti;
2584
2585 r = parse_cache_args(ca, argc, argv, &ti->error);
2586 if (r)
2587 goto out;
2588
2589 r = cache_create(ca, &cache);
2590 if (r)
2591 goto out;
2592
2593 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2594 if (r) {
2595 destroy(cache);
2596 goto out;
2597 }
2598
2599 ti->private = cache;
2600out:
2601 destroy_cache_args(ca);
2602 return r;
2603}
2604
2605
2606
2607static int cache_map(struct dm_target *ti, struct bio *bio)
2608{
2609 struct cache *cache = ti->private;
2610
2611 int r;
2612 bool commit_needed;
2613 dm_oblock_t block = get_bio_block(cache, bio);
2614
2615 init_per_bio_data(bio);
2616 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2617
2618
2619
2620
2621
2622 remap_to_origin(cache, bio);
2623 accounted_begin(cache, bio);
2624 return DM_MAPIO_REMAPPED;
2625 }
2626
2627 if (discard_or_flush(bio)) {
2628 defer_bio(cache, bio);
2629 return DM_MAPIO_SUBMITTED;
2630 }
2631
2632 r = map_bio(cache, bio, block, &commit_needed);
2633 if (commit_needed)
2634 schedule_commit(&cache->committer);
2635
2636 return r;
2637}
2638
2639static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
2640{
2641 struct cache *cache = ti->private;
2642 unsigned long flags;
2643 struct per_bio_data *pb = get_per_bio_data(bio);
2644
2645 if (pb->tick) {
2646 policy_tick(cache->policy, false);
2647
2648 spin_lock_irqsave(&cache->lock, flags);
2649 cache->need_tick_bio = true;
2650 spin_unlock_irqrestore(&cache->lock, flags);
2651 }
2652
2653 bio_drop_shared_lock(cache, bio);
2654 accounted_complete(cache, bio);
2655
2656 return DM_ENDIO_DONE;
2657}
2658
2659static int write_dirty_bitset(struct cache *cache)
2660{
2661 int r;
2662
2663 if (get_cache_mode(cache) >= CM_READ_ONLY)
2664 return -EINVAL;
2665
2666 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2667 if (r)
2668 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2669
2670 return r;
2671}
2672
2673static int write_discard_bitset(struct cache *cache)
2674{
2675 unsigned i, r;
2676
2677 if (get_cache_mode(cache) >= CM_READ_ONLY)
2678 return -EINVAL;
2679
2680 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2681 cache->discard_nr_blocks);
2682 if (r) {
2683 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2684 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2685 return r;
2686 }
2687
2688 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2689 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2690 is_discarded(cache, to_dblock(i)));
2691 if (r) {
2692 metadata_operation_failed(cache, "dm_cache_set_discard", r);
2693 return r;
2694 }
2695 }
2696
2697 return 0;
2698}
2699
2700static int write_hints(struct cache *cache)
2701{
2702 int r;
2703
2704 if (get_cache_mode(cache) >= CM_READ_ONLY)
2705 return -EINVAL;
2706
2707 r = dm_cache_write_hints(cache->cmd, cache->policy);
2708 if (r) {
2709 metadata_operation_failed(cache, "dm_cache_write_hints", r);
2710 return r;
2711 }
2712
2713 return 0;
2714}
2715
2716
2717
2718
2719static bool sync_metadata(struct cache *cache)
2720{
2721 int r1, r2, r3, r4;
2722
2723 r1 = write_dirty_bitset(cache);
2724 if (r1)
2725 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2726
2727 r2 = write_discard_bitset(cache);
2728 if (r2)
2729 DMERR("%s: could not write discard bitset", cache_device_name(cache));
2730
2731 save_stats(cache);
2732
2733 r3 = write_hints(cache);
2734 if (r3)
2735 DMERR("%s: could not write hints", cache_device_name(cache));
2736
2737
2738
2739
2740
2741
2742 r4 = commit(cache, !r1 && !r2 && !r3);
2743 if (r4)
2744 DMERR("%s: could not write cache metadata", cache_device_name(cache));
2745
2746 return !r1 && !r2 && !r3 && !r4;
2747}
2748
2749static void cache_postsuspend(struct dm_target *ti)
2750{
2751 struct cache *cache = ti->private;
2752
2753 prevent_background_work(cache);
2754 BUG_ON(atomic_read(&cache->nr_io_migrations));
2755
2756 cancel_delayed_work_sync(&cache->waker);
2757 drain_workqueue(cache->wq);
2758 WARN_ON(cache->tracker.in_flight);
2759
2760
2761
2762
2763
2764 requeue_deferred_bios(cache);
2765
2766 if (get_cache_mode(cache) == CM_WRITE)
2767 (void) sync_metadata(cache);
2768}
2769
2770static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2771 bool dirty, uint32_t hint, bool hint_valid)
2772{
2773 struct cache *cache = context;
2774
2775 if (dirty) {
2776 set_bit(from_cblock(cblock), cache->dirty_bitset);
2777 atomic_inc(&cache->nr_dirty);
2778 } else
2779 clear_bit(from_cblock(cblock), cache->dirty_bitset);
2780
2781 return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2782}
2783
2784
2785
2786
2787
2788
2789
2790struct discard_load_info {
2791 struct cache *cache;
2792
2793
2794
2795
2796
2797 dm_block_t block_size;
2798 dm_block_t discard_begin, discard_end;
2799};
2800
2801static void discard_load_info_init(struct cache *cache,
2802 struct discard_load_info *li)
2803{
2804 li->cache = cache;
2805 li->discard_begin = li->discard_end = 0;
2806}
2807
2808static void set_discard_range(struct discard_load_info *li)
2809{
2810 sector_t b, e;
2811
2812 if (li->discard_begin == li->discard_end)
2813 return;
2814
2815
2816
2817
2818 b = li->discard_begin * li->block_size;
2819 e = li->discard_end * li->block_size;
2820
2821
2822
2823
2824 b = dm_sector_div_up(b, li->cache->discard_block_size);
2825 sector_div(e, li->cache->discard_block_size);
2826
2827
2828
2829
2830
2831 if (e > from_dblock(li->cache->discard_nr_blocks))
2832 e = from_dblock(li->cache->discard_nr_blocks);
2833
2834 for (; b < e; b++)
2835 set_discard(li->cache, to_dblock(b));
2836}
2837
2838static int load_discard(void *context, sector_t discard_block_size,
2839 dm_dblock_t dblock, bool discard)
2840{
2841 struct discard_load_info *li = context;
2842
2843 li->block_size = discard_block_size;
2844
2845 if (discard) {
2846 if (from_dblock(dblock) == li->discard_end)
2847
2848
2849
2850 li->discard_end = li->discard_end + 1ULL;
2851
2852 else {
2853
2854
2855
2856 set_discard_range(li);
2857 li->discard_begin = from_dblock(dblock);
2858 li->discard_end = li->discard_begin + 1ULL;
2859 }
2860 } else {
2861 set_discard_range(li);
2862 li->discard_begin = li->discard_end = 0;
2863 }
2864
2865 return 0;
2866}
2867
2868static dm_cblock_t get_cache_dev_size(struct cache *cache)
2869{
2870 sector_t size = get_dev_size(cache->cache_dev);
2871 (void) sector_div(size, cache->sectors_per_block);
2872 return to_cblock(size);
2873}
2874
2875static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2876{
2877 if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
2878 if (cache->sized) {
2879 DMERR("%s: unable to extend cache due to missing cache table reload",
2880 cache_device_name(cache));
2881 return false;
2882 }
2883 }
2884
2885
2886
2887
2888 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
2889 new_size = to_cblock(from_cblock(new_size) + 1);
2890 if (is_dirty(cache, new_size)) {
2891 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
2892 cache_device_name(cache),
2893 (unsigned long long) from_cblock(new_size));
2894 return false;
2895 }
2896 }
2897
2898 return true;
2899}
2900
2901static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2902{
2903 int r;
2904
2905 r = dm_cache_resize(cache->cmd, new_size);
2906 if (r) {
2907 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
2908 metadata_operation_failed(cache, "dm_cache_resize", r);
2909 return r;
2910 }
2911
2912 set_cache_size(cache, new_size);
2913
2914 return 0;
2915}
2916
2917static int cache_preresume(struct dm_target *ti)
2918{
2919 int r = 0;
2920 struct cache *cache = ti->private;
2921 dm_cblock_t csize = get_cache_dev_size(cache);
2922
2923
2924
2925
2926 if (!cache->sized) {
2927 r = resize_cache_dev(cache, csize);
2928 if (r)
2929 return r;
2930
2931 cache->sized = true;
2932
2933 } else if (csize != cache->cache_size) {
2934 if (!can_resize(cache, csize))
2935 return -EINVAL;
2936
2937 r = resize_cache_dev(cache, csize);
2938 if (r)
2939 return r;
2940 }
2941
2942 if (!cache->loaded_mappings) {
2943 r = dm_cache_load_mappings(cache->cmd, cache->policy,
2944 load_mapping, cache);
2945 if (r) {
2946 DMERR("%s: could not load cache mappings", cache_device_name(cache));
2947 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
2948 return r;
2949 }
2950
2951 cache->loaded_mappings = true;
2952 }
2953
2954 if (!cache->loaded_discards) {
2955 struct discard_load_info li;
2956
2957
2958
2959
2960
2961
2962 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2963
2964 discard_load_info_init(cache, &li);
2965 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
2966 if (r) {
2967 DMERR("%s: could not load origin discards", cache_device_name(cache));
2968 metadata_operation_failed(cache, "dm_cache_load_discards", r);
2969 return r;
2970 }
2971 set_discard_range(&li);
2972
2973 cache->loaded_discards = true;
2974 }
2975
2976 return r;
2977}
2978
2979static void cache_resume(struct dm_target *ti)
2980{
2981 struct cache *cache = ti->private;
2982
2983 cache->need_tick_bio = true;
2984 allow_background_work(cache);
2985 do_waker(&cache->waker.work);
2986}
2987
2988static void emit_flags(struct cache *cache, char *result,
2989 unsigned maxlen, ssize_t *sz_ptr)
2990{
2991 ssize_t sz = *sz_ptr;
2992 struct cache_features *cf = &cache->features;
2993 unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
2994
2995 DMEMIT("%u ", count);
2996
2997 if (cf->metadata_version == 2)
2998 DMEMIT("metadata2 ");
2999
3000 if (writethrough_mode(cache))
3001 DMEMIT("writethrough ");
3002
3003 else if (passthrough_mode(cache))
3004 DMEMIT("passthrough ");
3005
3006 else if (writeback_mode(cache))
3007 DMEMIT("writeback ");
3008
3009 else {
3010 DMEMIT("unknown ");
3011 DMERR("%s: internal error: unknown io mode: %d",
3012 cache_device_name(cache), (int) cf->io_mode);
3013 }
3014
3015 if (!cf->discard_passdown)
3016 DMEMIT("no_discard_passdown ");
3017
3018 *sz_ptr = sz;
3019}
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032static void cache_status(struct dm_target *ti, status_type_t type,
3033 unsigned status_flags, char *result, unsigned maxlen)
3034{
3035 int r = 0;
3036 unsigned i;
3037 ssize_t sz = 0;
3038 dm_block_t nr_free_blocks_metadata = 0;
3039 dm_block_t nr_blocks_metadata = 0;
3040 char buf[BDEVNAME_SIZE];
3041 struct cache *cache = ti->private;
3042 dm_cblock_t residency;
3043 bool needs_check;
3044
3045 switch (type) {
3046 case STATUSTYPE_INFO:
3047 if (get_cache_mode(cache) == CM_FAIL) {
3048 DMEMIT("Fail");
3049 break;
3050 }
3051
3052
3053 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3054 (void) commit(cache, false);
3055
3056 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3057 if (r) {
3058 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3059 cache_device_name(cache), r);
3060 goto err;
3061 }
3062
3063 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3064 if (r) {
3065 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3066 cache_device_name(cache), r);
3067 goto err;
3068 }
3069
3070 residency = policy_residency(cache->policy);
3071
3072 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3073 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3074 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3075 (unsigned long long)nr_blocks_metadata,
3076 (unsigned long long)cache->sectors_per_block,
3077 (unsigned long long) from_cblock(residency),
3078 (unsigned long long) from_cblock(cache->cache_size),
3079 (unsigned) atomic_read(&cache->stats.read_hit),
3080 (unsigned) atomic_read(&cache->stats.read_miss),
3081 (unsigned) atomic_read(&cache->stats.write_hit),
3082 (unsigned) atomic_read(&cache->stats.write_miss),
3083 (unsigned) atomic_read(&cache->stats.demotion),
3084 (unsigned) atomic_read(&cache->stats.promotion),
3085 (unsigned long) atomic_read(&cache->nr_dirty));
3086
3087 emit_flags(cache, result, maxlen, &sz);
3088
3089 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3090
3091 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3092 if (sz < maxlen) {
3093 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3094 if (r)
3095 DMERR("%s: policy_emit_config_values returned %d",
3096 cache_device_name(cache), r);
3097 }
3098
3099 if (get_cache_mode(cache) == CM_READ_ONLY)
3100 DMEMIT("ro ");
3101 else
3102 DMEMIT("rw ");
3103
3104 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3105
3106 if (r || needs_check)
3107 DMEMIT("needs_check ");
3108 else
3109 DMEMIT("- ");
3110
3111 break;
3112
3113 case STATUSTYPE_TABLE:
3114 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3115 DMEMIT("%s ", buf);
3116 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3117 DMEMIT("%s ", buf);
3118 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3119 DMEMIT("%s", buf);
3120
3121 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3122 DMEMIT(" %s", cache->ctr_args[i]);
3123 if (cache->nr_ctr_args)
3124 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3125 break;
3126
3127 case STATUSTYPE_IMA:
3128 DMEMIT_TARGET_NAME_VERSION(ti->type);
3129 if (get_cache_mode(cache) == CM_FAIL)
3130 DMEMIT(",metadata_mode=fail");
3131 else if (get_cache_mode(cache) == CM_READ_ONLY)
3132 DMEMIT(",metadata_mode=ro");
3133 else
3134 DMEMIT(",metadata_mode=rw");
3135
3136 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3137 DMEMIT(",cache_metadata_device=%s", buf);
3138 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3139 DMEMIT(",cache_device=%s", buf);
3140 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3141 DMEMIT(",cache_origin_device=%s", buf);
3142 DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n');
3143 DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n');
3144 DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n');
3145 DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n');
3146 DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y');
3147 DMEMIT(";");
3148 break;
3149 }
3150
3151 return;
3152
3153err:
3154 DMEMIT("Error");
3155}
3156
3157
3158
3159
3160
3161struct cblock_range {
3162 dm_cblock_t begin;
3163 dm_cblock_t end;
3164};
3165
3166
3167
3168
3169
3170
3171
3172static int parse_cblock_range(struct cache *cache, const char *str,
3173 struct cblock_range *result)
3174{
3175 char dummy;
3176 uint64_t b, e;
3177 int r;
3178
3179
3180
3181
3182 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3183 if (r < 0)
3184 return r;
3185
3186 if (r == 2) {
3187 result->begin = to_cblock(b);
3188 result->end = to_cblock(e);
3189 return 0;
3190 }
3191
3192
3193
3194
3195 r = sscanf(str, "%llu%c", &b, &dummy);
3196 if (r < 0)
3197 return r;
3198
3199 if (r == 1) {
3200 result->begin = to_cblock(b);
3201 result->end = to_cblock(from_cblock(result->begin) + 1u);
3202 return 0;
3203 }
3204
3205 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3206 return -EINVAL;
3207}
3208
3209static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3210{
3211 uint64_t b = from_cblock(range->begin);
3212 uint64_t e = from_cblock(range->end);
3213 uint64_t n = from_cblock(cache->cache_size);
3214
3215 if (b >= n) {
3216 DMERR("%s: begin cblock out of range: %llu >= %llu",
3217 cache_device_name(cache), b, n);
3218 return -EINVAL;
3219 }
3220
3221 if (e > n) {
3222 DMERR("%s: end cblock out of range: %llu > %llu",
3223 cache_device_name(cache), e, n);
3224 return -EINVAL;
3225 }
3226
3227 if (b >= e) {
3228 DMERR("%s: invalid cblock range: %llu >= %llu",
3229 cache_device_name(cache), b, e);
3230 return -EINVAL;
3231 }
3232
3233 return 0;
3234}
3235
3236static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3237{
3238 return to_cblock(from_cblock(b) + 1);
3239}
3240
3241static int request_invalidation(struct cache *cache, struct cblock_range *range)
3242{
3243 int r = 0;
3244
3245
3246
3247
3248
3249
3250
3251 while (range->begin != range->end) {
3252 r = invalidate_cblock(cache, range->begin);
3253 if (r)
3254 return r;
3255
3256 range->begin = cblock_succ(range->begin);
3257 }
3258
3259 cache->commit_requested = true;
3260 return r;
3261}
3262
3263static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3264 const char **cblock_ranges)
3265{
3266 int r = 0;
3267 unsigned i;
3268 struct cblock_range range;
3269
3270 if (!passthrough_mode(cache)) {
3271 DMERR("%s: cache has to be in passthrough mode for invalidation",
3272 cache_device_name(cache));
3273 return -EPERM;
3274 }
3275
3276 for (i = 0; i < count; i++) {
3277 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3278 if (r)
3279 break;
3280
3281 r = validate_cblock_range(cache, &range);
3282 if (r)
3283 break;
3284
3285
3286
3287
3288 r = request_invalidation(cache, &range);
3289 if (r)
3290 break;
3291 }
3292
3293 return r;
3294}
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
3305 char *result, unsigned maxlen)
3306{
3307 struct cache *cache = ti->private;
3308
3309 if (!argc)
3310 return -EINVAL;
3311
3312 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3313 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3314 cache_device_name(cache));
3315 return -EOPNOTSUPP;
3316 }
3317
3318 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3319 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3320
3321 if (argc != 2)
3322 return -EINVAL;
3323
3324 return set_config_value(cache, argv[0], argv[1]);
3325}
3326
3327static int cache_iterate_devices(struct dm_target *ti,
3328 iterate_devices_callout_fn fn, void *data)
3329{
3330 int r = 0;
3331 struct cache *cache = ti->private;
3332
3333 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3334 if (!r)
3335 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3336
3337 return r;
3338}
3339
3340static bool origin_dev_supports_discard(struct block_device *origin_bdev)
3341{
3342 struct request_queue *q = bdev_get_queue(origin_bdev);
3343
3344 return blk_queue_discard(q);
3345}
3346
3347
3348
3349
3350
3351static void disable_passdown_if_not_supported(struct cache *cache)
3352{
3353 struct block_device *origin_bdev = cache->origin_dev->bdev;
3354 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3355 const char *reason = NULL;
3356 char buf[BDEVNAME_SIZE];
3357
3358 if (!cache->features.discard_passdown)
3359 return;
3360
3361 if (!origin_dev_supports_discard(origin_bdev))
3362 reason = "discard unsupported";
3363
3364 else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
3365 reason = "max discard sectors smaller than a block";
3366
3367 if (reason) {
3368 DMWARN("Origin device (%s) %s: Disabling discard passdown.",
3369 bdevname(origin_bdev, buf), reason);
3370 cache->features.discard_passdown = false;
3371 }
3372}
3373
3374static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3375{
3376 struct block_device *origin_bdev = cache->origin_dev->bdev;
3377 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3378
3379 if (!cache->features.discard_passdown) {
3380
3381 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3382 cache->origin_sectors);
3383 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3384 return;
3385 }
3386
3387
3388
3389
3390
3391 limits->max_discard_sectors = origin_limits->max_discard_sectors;
3392 limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
3393 limits->discard_granularity = origin_limits->discard_granularity;
3394 limits->discard_alignment = origin_limits->discard_alignment;
3395 limits->discard_misaligned = origin_limits->discard_misaligned;
3396}
3397
3398static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3399{
3400 struct cache *cache = ti->private;
3401 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3402
3403
3404
3405
3406
3407 if (io_opt_sectors < cache->sectors_per_block ||
3408 do_div(io_opt_sectors, cache->sectors_per_block)) {
3409 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3410 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3411 }
3412
3413 disable_passdown_if_not_supported(cache);
3414 set_discard_limits(cache, limits);
3415}
3416
3417
3418
3419static struct target_type cache_target = {
3420 .name = "cache",
3421 .version = {2, 2, 0},
3422 .module = THIS_MODULE,
3423 .ctr = cache_ctr,
3424 .dtr = cache_dtr,
3425 .map = cache_map,
3426 .end_io = cache_end_io,
3427 .postsuspend = cache_postsuspend,
3428 .preresume = cache_preresume,
3429 .resume = cache_resume,
3430 .status = cache_status,
3431 .message = cache_message,
3432 .iterate_devices = cache_iterate_devices,
3433 .io_hints = cache_io_hints,
3434};
3435
3436static int __init dm_cache_init(void)
3437{
3438 int r;
3439
3440 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3441 if (!migration_cache)
3442 return -ENOMEM;
3443
3444 r = dm_register_target(&cache_target);
3445 if (r) {
3446 DMERR("cache target registration failed: %d", r);
3447 kmem_cache_destroy(migration_cache);
3448 return r;
3449 }
3450
3451 return 0;
3452}
3453
3454static void __exit dm_cache_exit(void)
3455{
3456 dm_unregister_target(&cache_target);
3457 kmem_cache_destroy(migration_cache);
3458}
3459
3460module_init(dm_cache_init);
3461module_exit(dm_cache_exit);
3462
3463MODULE_DESCRIPTION(DM_NAME " cache target");
3464MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3465MODULE_LICENSE("GPL");
3466