1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison-v2.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/rwsem.h>
19#include <linux/slab.h>
20#include <linux/vmalloc.h>
21
22#define DM_MSG_PREFIX "cache"
23
24DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
25 "A percentage of time allocated for copying to and/or from cache");
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42struct io_tracker {
43 spinlock_t lock;
44
45
46
47
48 sector_t in_flight;
49
50
51
52
53
54 unsigned long idle_time;
55 unsigned long last_update_time;
56};
57
58static void iot_init(struct io_tracker *iot)
59{
60 spin_lock_init(&iot->lock);
61 iot->in_flight = 0ul;
62 iot->idle_time = 0ul;
63 iot->last_update_time = jiffies;
64}
65
66static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
67{
68 if (iot->in_flight)
69 return false;
70
71 return time_after(jiffies, iot->idle_time + jifs);
72}
73
74static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
75{
76 bool r;
77
78 spin_lock_irq(&iot->lock);
79 r = __iot_idle_for(iot, jifs);
80 spin_unlock_irq(&iot->lock);
81
82 return r;
83}
84
85static void iot_io_begin(struct io_tracker *iot, sector_t len)
86{
87 spin_lock_irq(&iot->lock);
88 iot->in_flight += len;
89 spin_unlock_irq(&iot->lock);
90}
91
92static void __iot_io_end(struct io_tracker *iot, sector_t len)
93{
94 if (!len)
95 return;
96
97 iot->in_flight -= len;
98 if (!iot->in_flight)
99 iot->idle_time = jiffies;
100}
101
102static void iot_io_end(struct io_tracker *iot, sector_t len)
103{
104 unsigned long flags;
105
106 spin_lock_irqsave(&iot->lock, flags);
107 __iot_io_end(iot, len);
108 spin_unlock_irqrestore(&iot->lock, flags);
109}
110
111
112
113
114
115
116
117struct continuation {
118 struct work_struct ws;
119 blk_status_t input;
120};
121
122static inline void init_continuation(struct continuation *k,
123 void (*fn)(struct work_struct *))
124{
125 INIT_WORK(&k->ws, fn);
126 k->input = 0;
127}
128
129static inline void queue_continuation(struct workqueue_struct *wq,
130 struct continuation *k)
131{
132 queue_work(wq, &k->ws);
133}
134
135
136
137
138
139
140
141struct batcher {
142
143
144
145 blk_status_t (*commit_op)(void *context);
146 void *commit_context;
147
148
149
150
151
152 void (*issue_op)(struct bio *bio, void *context);
153 void *issue_context;
154
155
156
157
158 struct workqueue_struct *wq;
159
160 spinlock_t lock;
161 struct list_head work_items;
162 struct bio_list bios;
163 struct work_struct commit_work;
164
165 bool commit_scheduled;
166};
167
168static void __commit(struct work_struct *_ws)
169{
170 struct batcher *b = container_of(_ws, struct batcher, commit_work);
171 blk_status_t r;
172 struct list_head work_items;
173 struct work_struct *ws, *tmp;
174 struct continuation *k;
175 struct bio *bio;
176 struct bio_list bios;
177
178 INIT_LIST_HEAD(&work_items);
179 bio_list_init(&bios);
180
181
182
183
184
185 spin_lock_irq(&b->lock);
186 list_splice_init(&b->work_items, &work_items);
187 bio_list_merge(&bios, &b->bios);
188 bio_list_init(&b->bios);
189 b->commit_scheduled = false;
190 spin_unlock_irq(&b->lock);
191
192 r = b->commit_op(b->commit_context);
193
194 list_for_each_entry_safe(ws, tmp, &work_items, entry) {
195 k = container_of(ws, struct continuation, ws);
196 k->input = r;
197 INIT_LIST_HEAD(&ws->entry);
198 queue_work(b->wq, ws);
199 }
200
201 while ((bio = bio_list_pop(&bios))) {
202 if (r) {
203 bio->bi_status = r;
204 bio_endio(bio);
205 } else
206 b->issue_op(bio, b->issue_context);
207 }
208}
209
210static void batcher_init(struct batcher *b,
211 blk_status_t (*commit_op)(void *),
212 void *commit_context,
213 void (*issue_op)(struct bio *bio, void *),
214 void *issue_context,
215 struct workqueue_struct *wq)
216{
217 b->commit_op = commit_op;
218 b->commit_context = commit_context;
219 b->issue_op = issue_op;
220 b->issue_context = issue_context;
221 b->wq = wq;
222
223 spin_lock_init(&b->lock);
224 INIT_LIST_HEAD(&b->work_items);
225 bio_list_init(&b->bios);
226 INIT_WORK(&b->commit_work, __commit);
227 b->commit_scheduled = false;
228}
229
230static void async_commit(struct batcher *b)
231{
232 queue_work(b->wq, &b->commit_work);
233}
234
235static void continue_after_commit(struct batcher *b, struct continuation *k)
236{
237 bool commit_scheduled;
238
239 spin_lock_irq(&b->lock);
240 commit_scheduled = b->commit_scheduled;
241 list_add_tail(&k->ws.entry, &b->work_items);
242 spin_unlock_irq(&b->lock);
243
244 if (commit_scheduled)
245 async_commit(b);
246}
247
248
249
250
251static void issue_after_commit(struct batcher *b, struct bio *bio)
252{
253 bool commit_scheduled;
254
255 spin_lock_irq(&b->lock);
256 commit_scheduled = b->commit_scheduled;
257 bio_list_add(&b->bios, bio);
258 spin_unlock_irq(&b->lock);
259
260 if (commit_scheduled)
261 async_commit(b);
262}
263
264
265
266
267static void schedule_commit(struct batcher *b)
268{
269 bool immediate;
270
271 spin_lock_irq(&b->lock);
272 immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
273 b->commit_scheduled = true;
274 spin_unlock_irq(&b->lock);
275
276 if (immediate)
277 async_commit(b);
278}
279
280
281
282
283
284
285struct dm_hook_info {
286 bio_end_io_t *bi_end_io;
287};
288
289static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
290 bio_end_io_t *bi_end_io, void *bi_private)
291{
292 h->bi_end_io = bio->bi_end_io;
293
294 bio->bi_end_io = bi_end_io;
295 bio->bi_private = bi_private;
296}
297
298static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
299{
300 bio->bi_end_io = h->bi_end_io;
301}
302
303
304
305#define MIGRATION_POOL_SIZE 128
306#define COMMIT_PERIOD HZ
307#define MIGRATION_COUNT_WINDOW 10
308
309
310
311
312
313#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
314#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
315
316enum cache_metadata_mode {
317 CM_WRITE,
318 CM_READ_ONLY,
319 CM_FAIL
320};
321
322enum cache_io_mode {
323
324
325
326
327
328 CM_IO_WRITEBACK,
329
330
331
332
333
334 CM_IO_WRITETHROUGH,
335
336
337
338
339
340
341
342 CM_IO_PASSTHROUGH
343};
344
345struct cache_features {
346 enum cache_metadata_mode mode;
347 enum cache_io_mode io_mode;
348 unsigned metadata_version;
349 bool discard_passdown:1;
350};
351
352struct cache_stats {
353 atomic_t read_hit;
354 atomic_t read_miss;
355 atomic_t write_hit;
356 atomic_t write_miss;
357 atomic_t demotion;
358 atomic_t promotion;
359 atomic_t writeback;
360 atomic_t copies_avoided;
361 atomic_t cache_cell_clash;
362 atomic_t commit_count;
363 atomic_t discard_count;
364};
365
366struct cache {
367 struct dm_target *ti;
368 spinlock_t lock;
369
370
371
372
373 int sectors_per_block_shift;
374 sector_t sectors_per_block;
375
376 struct dm_cache_metadata *cmd;
377
378
379
380
381 struct dm_dev *metadata_dev;
382
383
384
385
386 struct dm_dev *origin_dev;
387
388
389
390
391 struct dm_dev *cache_dev;
392
393
394
395
396 dm_oblock_t origin_blocks;
397 sector_t origin_sectors;
398
399
400
401
402 dm_cblock_t cache_size;
403
404
405
406
407 spinlock_t invalidation_lock;
408 struct list_head invalidation_requests;
409
410 sector_t migration_threshold;
411 wait_queue_head_t migration_wait;
412 atomic_t nr_allocated_migrations;
413
414
415
416
417
418 atomic_t nr_io_migrations;
419
420 struct bio_list deferred_bios;
421
422 struct rw_semaphore quiesce_lock;
423
424 struct dm_target_callbacks callbacks;
425
426
427
428
429 dm_dblock_t discard_nr_blocks;
430 unsigned long *discard_bitset;
431 uint32_t discard_block_size;
432
433
434
435
436
437 unsigned nr_ctr_args;
438 const char **ctr_args;
439
440 struct dm_kcopyd_client *copier;
441 struct work_struct deferred_bio_worker;
442 struct work_struct migration_worker;
443 struct workqueue_struct *wq;
444 struct delayed_work waker;
445 struct dm_bio_prison_v2 *prison;
446
447
448
449
450 unsigned long *dirty_bitset;
451 atomic_t nr_dirty;
452
453 unsigned policy_nr_args;
454 struct dm_cache_policy *policy;
455
456
457
458
459 struct cache_features features;
460
461 struct cache_stats stats;
462
463 bool need_tick_bio:1;
464 bool sized:1;
465 bool invalidate:1;
466 bool commit_requested:1;
467 bool loaded_mappings:1;
468 bool loaded_discards:1;
469
470 struct rw_semaphore background_work_lock;
471
472 struct batcher committer;
473 struct work_struct commit_ws;
474
475 struct io_tracker tracker;
476
477 mempool_t migration_pool;
478
479 struct bio_set bs;
480};
481
482struct per_bio_data {
483 bool tick:1;
484 unsigned req_nr:2;
485 struct dm_bio_prison_cell_v2 *cell;
486 struct dm_hook_info hook_info;
487 sector_t len;
488};
489
490struct dm_cache_migration {
491 struct continuation k;
492 struct cache *cache;
493
494 struct policy_work *op;
495 struct bio *overwrite_bio;
496 struct dm_bio_prison_cell_v2 *cell;
497
498 dm_cblock_t invalidate_cblock;
499 dm_oblock_t invalidate_oblock;
500};
501
502
503
504static bool writethrough_mode(struct cache *cache)
505{
506 return cache->features.io_mode == CM_IO_WRITETHROUGH;
507}
508
509static bool writeback_mode(struct cache *cache)
510{
511 return cache->features.io_mode == CM_IO_WRITEBACK;
512}
513
514static inline bool passthrough_mode(struct cache *cache)
515{
516 return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
517}
518
519
520
521static void wake_deferred_bio_worker(struct cache *cache)
522{
523 queue_work(cache->wq, &cache->deferred_bio_worker);
524}
525
526static void wake_migration_worker(struct cache *cache)
527{
528 if (passthrough_mode(cache))
529 return;
530
531 queue_work(cache->wq, &cache->migration_worker);
532}
533
534
535
536static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
537{
538 return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
539}
540
541static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
542{
543 dm_bio_prison_free_cell_v2(cache->prison, cell);
544}
545
546static struct dm_cache_migration *alloc_migration(struct cache *cache)
547{
548 struct dm_cache_migration *mg;
549
550 mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
551
552 memset(mg, 0, sizeof(*mg));
553
554 mg->cache = cache;
555 atomic_inc(&cache->nr_allocated_migrations);
556
557 return mg;
558}
559
560static void free_migration(struct dm_cache_migration *mg)
561{
562 struct cache *cache = mg->cache;
563
564 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
565 wake_up(&cache->migration_wait);
566
567 mempool_free(mg, &cache->migration_pool);
568}
569
570
571
572static inline dm_oblock_t oblock_succ(dm_oblock_t b)
573{
574 return to_oblock(from_oblock(b) + 1ull);
575}
576
577static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
578{
579 key->virtual = 0;
580 key->dev = 0;
581 key->block_begin = from_oblock(begin);
582 key->block_end = from_oblock(end);
583}
584
585
586
587
588
589#define WRITE_LOCK_LEVEL 0
590#define READ_WRITE_LOCK_LEVEL 1
591
592static unsigned lock_level(struct bio *bio)
593{
594 return bio_data_dir(bio) == WRITE ?
595 WRITE_LOCK_LEVEL :
596 READ_WRITE_LOCK_LEVEL;
597}
598
599
600
601
602
603static struct per_bio_data *get_per_bio_data(struct bio *bio)
604{
605 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
606 BUG_ON(!pb);
607 return pb;
608}
609
610static struct per_bio_data *init_per_bio_data(struct bio *bio)
611{
612 struct per_bio_data *pb = get_per_bio_data(bio);
613
614 pb->tick = false;
615 pb->req_nr = dm_bio_get_target_bio_nr(bio);
616 pb->cell = NULL;
617 pb->len = 0;
618
619 return pb;
620}
621
622
623
624static void defer_bio(struct cache *cache, struct bio *bio)
625{
626 spin_lock_irq(&cache->lock);
627 bio_list_add(&cache->deferred_bios, bio);
628 spin_unlock_irq(&cache->lock);
629
630 wake_deferred_bio_worker(cache);
631}
632
633static void defer_bios(struct cache *cache, struct bio_list *bios)
634{
635 spin_lock_irq(&cache->lock);
636 bio_list_merge(&cache->deferred_bios, bios);
637 bio_list_init(bios);
638 spin_unlock_irq(&cache->lock);
639
640 wake_deferred_bio_worker(cache);
641}
642
643
644
645static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
646{
647 bool r;
648 struct per_bio_data *pb;
649 struct dm_cell_key_v2 key;
650 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
651 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
652
653 cell_prealloc = alloc_prison_cell(cache);
654
655 build_key(oblock, end, &key);
656 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
657 if (!r) {
658
659
660
661 free_prison_cell(cache, cell_prealloc);
662 return r;
663 }
664
665 if (cell != cell_prealloc)
666 free_prison_cell(cache, cell_prealloc);
667
668 pb = get_per_bio_data(bio);
669 pb->cell = cell;
670
671 return r;
672}
673
674
675
676static bool is_dirty(struct cache *cache, dm_cblock_t b)
677{
678 return test_bit(from_cblock(b), cache->dirty_bitset);
679}
680
681static void set_dirty(struct cache *cache, dm_cblock_t cblock)
682{
683 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
684 atomic_inc(&cache->nr_dirty);
685 policy_set_dirty(cache->policy, cblock);
686 }
687}
688
689
690
691
692
693static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
694{
695 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
696 atomic_inc(&cache->nr_dirty);
697 policy_set_dirty(cache->policy, cblock);
698}
699
700static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
701{
702 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
703 if (atomic_dec_return(&cache->nr_dirty) == 0)
704 dm_table_event(cache->ti->table);
705 }
706
707 policy_clear_dirty(cache->policy, cblock);
708}
709
710
711
712static bool block_size_is_power_of_two(struct cache *cache)
713{
714 return cache->sectors_per_block_shift >= 0;
715}
716
717
718#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
719__always_inline
720#endif
721static dm_block_t block_div(dm_block_t b, uint32_t n)
722{
723 do_div(b, n);
724
725 return b;
726}
727
728static dm_block_t oblocks_per_dblock(struct cache *cache)
729{
730 dm_block_t oblocks = cache->discard_block_size;
731
732 if (block_size_is_power_of_two(cache))
733 oblocks >>= cache->sectors_per_block_shift;
734 else
735 oblocks = block_div(oblocks, cache->sectors_per_block);
736
737 return oblocks;
738}
739
740static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
741{
742 return to_dblock(block_div(from_oblock(oblock),
743 oblocks_per_dblock(cache)));
744}
745
746static void set_discard(struct cache *cache, dm_dblock_t b)
747{
748 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
749 atomic_inc(&cache->stats.discard_count);
750
751 spin_lock_irq(&cache->lock);
752 set_bit(from_dblock(b), cache->discard_bitset);
753 spin_unlock_irq(&cache->lock);
754}
755
756static void clear_discard(struct cache *cache, dm_dblock_t b)
757{
758 spin_lock_irq(&cache->lock);
759 clear_bit(from_dblock(b), cache->discard_bitset);
760 spin_unlock_irq(&cache->lock);
761}
762
763static bool is_discarded(struct cache *cache, dm_dblock_t b)
764{
765 int r;
766 spin_lock_irq(&cache->lock);
767 r = test_bit(from_dblock(b), cache->discard_bitset);
768 spin_unlock_irq(&cache->lock);
769
770 return r;
771}
772
773static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
774{
775 int r;
776 spin_lock_irq(&cache->lock);
777 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
778 cache->discard_bitset);
779 spin_unlock_irq(&cache->lock);
780
781 return r;
782}
783
784
785
786
787static void remap_to_origin(struct cache *cache, struct bio *bio)
788{
789 bio_set_dev(bio, cache->origin_dev->bdev);
790}
791
792static void remap_to_cache(struct cache *cache, struct bio *bio,
793 dm_cblock_t cblock)
794{
795 sector_t bi_sector = bio->bi_iter.bi_sector;
796 sector_t block = from_cblock(cblock);
797
798 bio_set_dev(bio, cache->cache_dev->bdev);
799 if (!block_size_is_power_of_two(cache))
800 bio->bi_iter.bi_sector =
801 (block * cache->sectors_per_block) +
802 sector_div(bi_sector, cache->sectors_per_block);
803 else
804 bio->bi_iter.bi_sector =
805 (block << cache->sectors_per_block_shift) |
806 (bi_sector & (cache->sectors_per_block - 1));
807}
808
809static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
810{
811 struct per_bio_data *pb;
812
813 spin_lock_irq(&cache->lock);
814 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
815 bio_op(bio) != REQ_OP_DISCARD) {
816 pb = get_per_bio_data(bio);
817 pb->tick = true;
818 cache->need_tick_bio = false;
819 }
820 spin_unlock_irq(&cache->lock);
821}
822
823static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
824 dm_oblock_t oblock, bool bio_has_pbd)
825{
826 if (bio_has_pbd)
827 check_if_tick_bio_needed(cache, bio);
828 remap_to_origin(cache, bio);
829 if (bio_data_dir(bio) == WRITE)
830 clear_discard(cache, oblock_to_dblock(cache, oblock));
831}
832
833static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
834 dm_oblock_t oblock)
835{
836
837 __remap_to_origin_clear_discard(cache, bio, oblock, true);
838}
839
840static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
841 dm_oblock_t oblock, dm_cblock_t cblock)
842{
843 check_if_tick_bio_needed(cache, bio);
844 remap_to_cache(cache, bio, cblock);
845 if (bio_data_dir(bio) == WRITE) {
846 set_dirty(cache, cblock);
847 clear_discard(cache, oblock_to_dblock(cache, oblock));
848 }
849}
850
851static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
852{
853 sector_t block_nr = bio->bi_iter.bi_sector;
854
855 if (!block_size_is_power_of_two(cache))
856 (void) sector_div(block_nr, cache->sectors_per_block);
857 else
858 block_nr >>= cache->sectors_per_block_shift;
859
860 return to_oblock(block_nr);
861}
862
863static bool accountable_bio(struct cache *cache, struct bio *bio)
864{
865 return bio_op(bio) != REQ_OP_DISCARD;
866}
867
868static void accounted_begin(struct cache *cache, struct bio *bio)
869{
870 struct per_bio_data *pb;
871
872 if (accountable_bio(cache, bio)) {
873 pb = get_per_bio_data(bio);
874 pb->len = bio_sectors(bio);
875 iot_io_begin(&cache->tracker, pb->len);
876 }
877}
878
879static void accounted_complete(struct cache *cache, struct bio *bio)
880{
881 struct per_bio_data *pb = get_per_bio_data(bio);
882
883 iot_io_end(&cache->tracker, pb->len);
884}
885
886static void accounted_request(struct cache *cache, struct bio *bio)
887{
888 accounted_begin(cache, bio);
889 generic_make_request(bio);
890}
891
892static void issue_op(struct bio *bio, void *context)
893{
894 struct cache *cache = context;
895 accounted_request(cache, bio);
896}
897
898
899
900
901
902static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
903 dm_oblock_t oblock, dm_cblock_t cblock)
904{
905 struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
906
907 BUG_ON(!origin_bio);
908
909 bio_chain(origin_bio, bio);
910
911
912
913
914 __remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
915 submit_bio(origin_bio);
916
917 remap_to_cache(cache, bio, cblock);
918}
919
920
921
922
923static enum cache_metadata_mode get_cache_mode(struct cache *cache)
924{
925 return cache->features.mode;
926}
927
928static const char *cache_device_name(struct cache *cache)
929{
930 return dm_device_name(dm_table_get_md(cache->ti->table));
931}
932
933static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
934{
935 const char *descs[] = {
936 "write",
937 "read-only",
938 "fail"
939 };
940
941 dm_table_event(cache->ti->table);
942 DMINFO("%s: switching cache to %s mode",
943 cache_device_name(cache), descs[(int)mode]);
944}
945
946static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
947{
948 bool needs_check;
949 enum cache_metadata_mode old_mode = get_cache_mode(cache);
950
951 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
952 DMERR("%s: unable to read needs_check flag, setting failure mode.",
953 cache_device_name(cache));
954 new_mode = CM_FAIL;
955 }
956
957 if (new_mode == CM_WRITE && needs_check) {
958 DMERR("%s: unable to switch cache to write mode until repaired.",
959 cache_device_name(cache));
960 if (old_mode != new_mode)
961 new_mode = old_mode;
962 else
963 new_mode = CM_READ_ONLY;
964 }
965
966
967 if (old_mode == CM_FAIL)
968 new_mode = CM_FAIL;
969
970 switch (new_mode) {
971 case CM_FAIL:
972 case CM_READ_ONLY:
973 dm_cache_metadata_set_read_only(cache->cmd);
974 break;
975
976 case CM_WRITE:
977 dm_cache_metadata_set_read_write(cache->cmd);
978 break;
979 }
980
981 cache->features.mode = new_mode;
982
983 if (new_mode != old_mode)
984 notify_mode_switch(cache, new_mode);
985}
986
987static void abort_transaction(struct cache *cache)
988{
989 const char *dev_name = cache_device_name(cache);
990
991 if (get_cache_mode(cache) >= CM_READ_ONLY)
992 return;
993
994 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
995 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
996 set_cache_mode(cache, CM_FAIL);
997 }
998
999 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1000 if (dm_cache_metadata_abort(cache->cmd)) {
1001 DMERR("%s: failed to abort metadata transaction", dev_name);
1002 set_cache_mode(cache, CM_FAIL);
1003 }
1004}
1005
1006static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1007{
1008 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1009 cache_device_name(cache), op, r);
1010 abort_transaction(cache);
1011 set_cache_mode(cache, CM_READ_ONLY);
1012}
1013
1014
1015
1016static void load_stats(struct cache *cache)
1017{
1018 struct dm_cache_statistics stats;
1019
1020 dm_cache_metadata_get_stats(cache->cmd, &stats);
1021 atomic_set(&cache->stats.read_hit, stats.read_hits);
1022 atomic_set(&cache->stats.read_miss, stats.read_misses);
1023 atomic_set(&cache->stats.write_hit, stats.write_hits);
1024 atomic_set(&cache->stats.write_miss, stats.write_misses);
1025}
1026
1027static void save_stats(struct cache *cache)
1028{
1029 struct dm_cache_statistics stats;
1030
1031 if (get_cache_mode(cache) >= CM_READ_ONLY)
1032 return;
1033
1034 stats.read_hits = atomic_read(&cache->stats.read_hit);
1035 stats.read_misses = atomic_read(&cache->stats.read_miss);
1036 stats.write_hits = atomic_read(&cache->stats.write_hit);
1037 stats.write_misses = atomic_read(&cache->stats.write_miss);
1038
1039 dm_cache_metadata_set_stats(cache->cmd, &stats);
1040}
1041
1042static void update_stats(struct cache_stats *stats, enum policy_operation op)
1043{
1044 switch (op) {
1045 case POLICY_PROMOTE:
1046 atomic_inc(&stats->promotion);
1047 break;
1048
1049 case POLICY_DEMOTE:
1050 atomic_inc(&stats->demotion);
1051 break;
1052
1053 case POLICY_WRITEBACK:
1054 atomic_inc(&stats->writeback);
1055 break;
1056 }
1057}
1058
1059
1060
1061
1062
1063
1064
1065
1066static void inc_io_migrations(struct cache *cache)
1067{
1068 atomic_inc(&cache->nr_io_migrations);
1069}
1070
1071static void dec_io_migrations(struct cache *cache)
1072{
1073 atomic_dec(&cache->nr_io_migrations);
1074}
1075
1076static bool discard_or_flush(struct bio *bio)
1077{
1078 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1079}
1080
1081static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1082 dm_dblock_t *b, dm_dblock_t *e)
1083{
1084 sector_t sb = bio->bi_iter.bi_sector;
1085 sector_t se = bio_end_sector(bio);
1086
1087 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1088
1089 if (se - sb < cache->discard_block_size)
1090 *e = *b;
1091 else
1092 *e = to_dblock(block_div(se, cache->discard_block_size));
1093}
1094
1095
1096
1097static void prevent_background_work(struct cache *cache)
1098{
1099 lockdep_off();
1100 down_write(&cache->background_work_lock);
1101 lockdep_on();
1102}
1103
1104static void allow_background_work(struct cache *cache)
1105{
1106 lockdep_off();
1107 up_write(&cache->background_work_lock);
1108 lockdep_on();
1109}
1110
1111static bool background_work_begin(struct cache *cache)
1112{
1113 bool r;
1114
1115 lockdep_off();
1116 r = down_read_trylock(&cache->background_work_lock);
1117 lockdep_on();
1118
1119 return r;
1120}
1121
1122static void background_work_end(struct cache *cache)
1123{
1124 lockdep_off();
1125 up_read(&cache->background_work_lock);
1126 lockdep_on();
1127}
1128
1129
1130
1131static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1132{
1133 return (bio_data_dir(bio) == WRITE) &&
1134 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1135}
1136
1137static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1138{
1139 return writeback_mode(cache) &&
1140 (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1141}
1142
1143static void quiesce(struct dm_cache_migration *mg,
1144 void (*continuation)(struct work_struct *))
1145{
1146 init_continuation(&mg->k, continuation);
1147 dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1148}
1149
1150static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1151{
1152 struct continuation *k = container_of(ws, struct continuation, ws);
1153 return container_of(k, struct dm_cache_migration, k);
1154}
1155
1156static void copy_complete(int read_err, unsigned long write_err, void *context)
1157{
1158 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1159
1160 if (read_err || write_err)
1161 mg->k.input = BLK_STS_IOERR;
1162
1163 queue_continuation(mg->cache->wq, &mg->k);
1164}
1165
1166static void copy(struct dm_cache_migration *mg, bool promote)
1167{
1168 struct dm_io_region o_region, c_region;
1169 struct cache *cache = mg->cache;
1170
1171 o_region.bdev = cache->origin_dev->bdev;
1172 o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1173 o_region.count = cache->sectors_per_block;
1174
1175 c_region.bdev = cache->cache_dev->bdev;
1176 c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1177 c_region.count = cache->sectors_per_block;
1178
1179 if (promote)
1180 dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1181 else
1182 dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1183}
1184
1185static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1186{
1187 struct per_bio_data *pb = get_per_bio_data(bio);
1188
1189 if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1190 free_prison_cell(cache, pb->cell);
1191 pb->cell = NULL;
1192}
1193
1194static void overwrite_endio(struct bio *bio)
1195{
1196 struct dm_cache_migration *mg = bio->bi_private;
1197 struct cache *cache = mg->cache;
1198 struct per_bio_data *pb = get_per_bio_data(bio);
1199
1200 dm_unhook_bio(&pb->hook_info, bio);
1201
1202 if (bio->bi_status)
1203 mg->k.input = bio->bi_status;
1204
1205 queue_continuation(cache->wq, &mg->k);
1206}
1207
1208static void overwrite(struct dm_cache_migration *mg,
1209 void (*continuation)(struct work_struct *))
1210{
1211 struct bio *bio = mg->overwrite_bio;
1212 struct per_bio_data *pb = get_per_bio_data(bio);
1213
1214 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1215
1216
1217
1218
1219
1220 if (mg->op->op == POLICY_PROMOTE)
1221 remap_to_cache(mg->cache, bio, mg->op->cblock);
1222 else
1223 remap_to_origin(mg->cache, bio);
1224
1225 init_continuation(&mg->k, continuation);
1226 accounted_request(mg->cache, bio);
1227}
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240static void mg_complete(struct dm_cache_migration *mg, bool success)
1241{
1242 struct bio_list bios;
1243 struct cache *cache = mg->cache;
1244 struct policy_work *op = mg->op;
1245 dm_cblock_t cblock = op->cblock;
1246
1247 if (success)
1248 update_stats(&cache->stats, op->op);
1249
1250 switch (op->op) {
1251 case POLICY_PROMOTE:
1252 clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1253 policy_complete_background_work(cache->policy, op, success);
1254
1255 if (mg->overwrite_bio) {
1256 if (success)
1257 force_set_dirty(cache, cblock);
1258 else if (mg->k.input)
1259 mg->overwrite_bio->bi_status = mg->k.input;
1260 else
1261 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1262 bio_endio(mg->overwrite_bio);
1263 } else {
1264 if (success)
1265 force_clear_dirty(cache, cblock);
1266 dec_io_migrations(cache);
1267 }
1268 break;
1269
1270 case POLICY_DEMOTE:
1271
1272
1273
1274 if (success)
1275 force_clear_dirty(cache, cblock);
1276 policy_complete_background_work(cache->policy, op, success);
1277 dec_io_migrations(cache);
1278 break;
1279
1280 case POLICY_WRITEBACK:
1281 if (success)
1282 force_clear_dirty(cache, cblock);
1283 policy_complete_background_work(cache->policy, op, success);
1284 dec_io_migrations(cache);
1285 break;
1286 }
1287
1288 bio_list_init(&bios);
1289 if (mg->cell) {
1290 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1291 free_prison_cell(cache, mg->cell);
1292 }
1293
1294 free_migration(mg);
1295 defer_bios(cache, &bios);
1296 wake_migration_worker(cache);
1297
1298 background_work_end(cache);
1299}
1300
1301static void mg_success(struct work_struct *ws)
1302{
1303 struct dm_cache_migration *mg = ws_to_mg(ws);
1304 mg_complete(mg, mg->k.input == 0);
1305}
1306
1307static void mg_update_metadata(struct work_struct *ws)
1308{
1309 int r;
1310 struct dm_cache_migration *mg = ws_to_mg(ws);
1311 struct cache *cache = mg->cache;
1312 struct policy_work *op = mg->op;
1313
1314 switch (op->op) {
1315 case POLICY_PROMOTE:
1316 r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1317 if (r) {
1318 DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1319 cache_device_name(cache));
1320 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1321
1322 mg_complete(mg, false);
1323 return;
1324 }
1325 mg_complete(mg, true);
1326 break;
1327
1328 case POLICY_DEMOTE:
1329 r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1330 if (r) {
1331 DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1332 cache_device_name(cache));
1333 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1334
1335 mg_complete(mg, false);
1336 return;
1337 }
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358 init_continuation(&mg->k, mg_success);
1359 continue_after_commit(&cache->committer, &mg->k);
1360 schedule_commit(&cache->committer);
1361 break;
1362
1363 case POLICY_WRITEBACK:
1364 mg_complete(mg, true);
1365 break;
1366 }
1367}
1368
1369static void mg_update_metadata_after_copy(struct work_struct *ws)
1370{
1371 struct dm_cache_migration *mg = ws_to_mg(ws);
1372
1373
1374
1375
1376 if (mg->k.input)
1377 mg_complete(mg, false);
1378 else
1379 mg_update_metadata(ws);
1380}
1381
1382static void mg_upgrade_lock(struct work_struct *ws)
1383{
1384 int r;
1385 struct dm_cache_migration *mg = ws_to_mg(ws);
1386
1387
1388
1389
1390 if (mg->k.input)
1391 mg_complete(mg, false);
1392
1393 else {
1394
1395
1396
1397 r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1398 READ_WRITE_LOCK_LEVEL);
1399 if (r < 0)
1400 mg_complete(mg, false);
1401
1402 else if (r)
1403 quiesce(mg, mg_update_metadata);
1404
1405 else
1406 mg_update_metadata(ws);
1407 }
1408}
1409
1410static void mg_full_copy(struct work_struct *ws)
1411{
1412 struct dm_cache_migration *mg = ws_to_mg(ws);
1413 struct cache *cache = mg->cache;
1414 struct policy_work *op = mg->op;
1415 bool is_policy_promote = (op->op == POLICY_PROMOTE);
1416
1417 if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1418 is_discarded_oblock(cache, op->oblock)) {
1419 mg_upgrade_lock(ws);
1420 return;
1421 }
1422
1423 init_continuation(&mg->k, mg_upgrade_lock);
1424 copy(mg, is_policy_promote);
1425}
1426
1427static void mg_copy(struct work_struct *ws)
1428{
1429 struct dm_cache_migration *mg = ws_to_mg(ws);
1430
1431 if (mg->overwrite_bio) {
1432
1433
1434
1435
1436
1437 if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
1438
1439
1440
1441 bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
1442 BUG_ON(rb);
1443 mg->overwrite_bio = NULL;
1444 inc_io_migrations(mg->cache);
1445 mg_full_copy(ws);
1446 return;
1447 }
1448
1449
1450
1451
1452
1453
1454
1455
1456 overwrite(mg, mg_update_metadata_after_copy);
1457
1458 } else
1459 mg_full_copy(ws);
1460}
1461
1462static int mg_lock_writes(struct dm_cache_migration *mg)
1463{
1464 int r;
1465 struct dm_cell_key_v2 key;
1466 struct cache *cache = mg->cache;
1467 struct dm_bio_prison_cell_v2 *prealloc;
1468
1469 prealloc = alloc_prison_cell(cache);
1470
1471
1472
1473
1474
1475
1476 build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1477 r = dm_cell_lock_v2(cache->prison, &key,
1478 mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1479 prealloc, &mg->cell);
1480 if (r < 0) {
1481 free_prison_cell(cache, prealloc);
1482 mg_complete(mg, false);
1483 return r;
1484 }
1485
1486 if (mg->cell != prealloc)
1487 free_prison_cell(cache, prealloc);
1488
1489 if (r == 0)
1490 mg_copy(&mg->k.ws);
1491 else
1492 quiesce(mg, mg_copy);
1493
1494 return 0;
1495}
1496
1497static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1498{
1499 struct dm_cache_migration *mg;
1500
1501 if (!background_work_begin(cache)) {
1502 policy_complete_background_work(cache->policy, op, false);
1503 return -EPERM;
1504 }
1505
1506 mg = alloc_migration(cache);
1507
1508 mg->op = op;
1509 mg->overwrite_bio = bio;
1510
1511 if (!bio)
1512 inc_io_migrations(cache);
1513
1514 return mg_lock_writes(mg);
1515}
1516
1517
1518
1519
1520
1521static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1522{
1523 struct bio_list bios;
1524 struct cache *cache = mg->cache;
1525
1526 bio_list_init(&bios);
1527 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1528 free_prison_cell(cache, mg->cell);
1529
1530 if (!success && mg->overwrite_bio)
1531 bio_io_error(mg->overwrite_bio);
1532
1533 free_migration(mg);
1534 defer_bios(cache, &bios);
1535
1536 background_work_end(cache);
1537}
1538
1539static void invalidate_completed(struct work_struct *ws)
1540{
1541 struct dm_cache_migration *mg = ws_to_mg(ws);
1542 invalidate_complete(mg, !mg->k.input);
1543}
1544
1545static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1546{
1547 int r = policy_invalidate_mapping(cache->policy, cblock);
1548 if (!r) {
1549 r = dm_cache_remove_mapping(cache->cmd, cblock);
1550 if (r) {
1551 DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1552 cache_device_name(cache));
1553 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1554 }
1555
1556 } else if (r == -ENODATA) {
1557
1558
1559
1560 r = 0;
1561
1562 } else
1563 DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1564
1565 return r;
1566}
1567
1568static void invalidate_remove(struct work_struct *ws)
1569{
1570 int r;
1571 struct dm_cache_migration *mg = ws_to_mg(ws);
1572 struct cache *cache = mg->cache;
1573
1574 r = invalidate_cblock(cache, mg->invalidate_cblock);
1575 if (r) {
1576 invalidate_complete(mg, false);
1577 return;
1578 }
1579
1580 init_continuation(&mg->k, invalidate_completed);
1581 continue_after_commit(&cache->committer, &mg->k);
1582 remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1583 mg->overwrite_bio = NULL;
1584 schedule_commit(&cache->committer);
1585}
1586
1587static int invalidate_lock(struct dm_cache_migration *mg)
1588{
1589 int r;
1590 struct dm_cell_key_v2 key;
1591 struct cache *cache = mg->cache;
1592 struct dm_bio_prison_cell_v2 *prealloc;
1593
1594 prealloc = alloc_prison_cell(cache);
1595
1596 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1597 r = dm_cell_lock_v2(cache->prison, &key,
1598 READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1599 if (r < 0) {
1600 free_prison_cell(cache, prealloc);
1601 invalidate_complete(mg, false);
1602 return r;
1603 }
1604
1605 if (mg->cell != prealloc)
1606 free_prison_cell(cache, prealloc);
1607
1608 if (r)
1609 quiesce(mg, invalidate_remove);
1610
1611 else {
1612
1613
1614
1615
1616 init_continuation(&mg->k, invalidate_remove);
1617 queue_work(cache->wq, &mg->k.ws);
1618 }
1619
1620 return 0;
1621}
1622
1623static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1624 dm_oblock_t oblock, struct bio *bio)
1625{
1626 struct dm_cache_migration *mg;
1627
1628 if (!background_work_begin(cache))
1629 return -EPERM;
1630
1631 mg = alloc_migration(cache);
1632
1633 mg->overwrite_bio = bio;
1634 mg->invalidate_cblock = cblock;
1635 mg->invalidate_oblock = oblock;
1636
1637 return invalidate_lock(mg);
1638}
1639
1640
1641
1642
1643
1644enum busy {
1645 IDLE,
1646 BUSY
1647};
1648
1649static enum busy spare_migration_bandwidth(struct cache *cache)
1650{
1651 bool idle = iot_idle_for(&cache->tracker, HZ);
1652 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1653 cache->sectors_per_block;
1654
1655 if (idle && current_volume <= cache->migration_threshold)
1656 return IDLE;
1657 else
1658 return BUSY;
1659}
1660
1661static void inc_hit_counter(struct cache *cache, struct bio *bio)
1662{
1663 atomic_inc(bio_data_dir(bio) == READ ?
1664 &cache->stats.read_hit : &cache->stats.write_hit);
1665}
1666
1667static void inc_miss_counter(struct cache *cache, struct bio *bio)
1668{
1669 atomic_inc(bio_data_dir(bio) == READ ?
1670 &cache->stats.read_miss : &cache->stats.write_miss);
1671}
1672
1673
1674
1675static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1676 bool *commit_needed)
1677{
1678 int r, data_dir;
1679 bool rb, background_queued;
1680 dm_cblock_t cblock;
1681
1682 *commit_needed = false;
1683
1684 rb = bio_detain_shared(cache, block, bio);
1685 if (!rb) {
1686
1687
1688
1689
1690
1691
1692 *commit_needed = true;
1693 return DM_MAPIO_SUBMITTED;
1694 }
1695
1696 data_dir = bio_data_dir(bio);
1697
1698 if (optimisable_bio(cache, bio, block)) {
1699 struct policy_work *op = NULL;
1700
1701 r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1702 if (unlikely(r && r != -ENOENT)) {
1703 DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1704 cache_device_name(cache), r);
1705 bio_io_error(bio);
1706 return DM_MAPIO_SUBMITTED;
1707 }
1708
1709 if (r == -ENOENT && op) {
1710 bio_drop_shared_lock(cache, bio);
1711 BUG_ON(op->op != POLICY_PROMOTE);
1712 mg_start(cache, op, bio);
1713 return DM_MAPIO_SUBMITTED;
1714 }
1715 } else {
1716 r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1717 if (unlikely(r && r != -ENOENT)) {
1718 DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1719 cache_device_name(cache), r);
1720 bio_io_error(bio);
1721 return DM_MAPIO_SUBMITTED;
1722 }
1723
1724 if (background_queued)
1725 wake_migration_worker(cache);
1726 }
1727
1728 if (r == -ENOENT) {
1729 struct per_bio_data *pb = get_per_bio_data(bio);
1730
1731
1732
1733
1734 inc_miss_counter(cache, bio);
1735 if (pb->req_nr == 0) {
1736 accounted_begin(cache, bio);
1737 remap_to_origin_clear_discard(cache, bio, block);
1738 } else {
1739
1740
1741
1742
1743 bio_endio(bio);
1744 return DM_MAPIO_SUBMITTED;
1745 }
1746 } else {
1747
1748
1749
1750 inc_hit_counter(cache, bio);
1751
1752
1753
1754
1755
1756 if (passthrough_mode(cache)) {
1757 if (bio_data_dir(bio) == WRITE) {
1758 bio_drop_shared_lock(cache, bio);
1759 atomic_inc(&cache->stats.demotion);
1760 invalidate_start(cache, cblock, block, bio);
1761 } else
1762 remap_to_origin_clear_discard(cache, bio, block);
1763 } else {
1764 if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
1765 !is_dirty(cache, cblock)) {
1766 remap_to_origin_and_cache(cache, bio, block, cblock);
1767 accounted_begin(cache, bio);
1768 } else
1769 remap_to_cache_dirty(cache, bio, block, cblock);
1770 }
1771 }
1772
1773
1774
1775
1776 if (bio->bi_opf & REQ_FUA) {
1777
1778
1779
1780
1781 accounted_complete(cache, bio);
1782 issue_after_commit(&cache->committer, bio);
1783 *commit_needed = true;
1784 return DM_MAPIO_SUBMITTED;
1785 }
1786
1787 return DM_MAPIO_REMAPPED;
1788}
1789
1790static bool process_bio(struct cache *cache, struct bio *bio)
1791{
1792 bool commit_needed;
1793
1794 if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1795 generic_make_request(bio);
1796
1797 return commit_needed;
1798}
1799
1800
1801
1802
1803static int commit(struct cache *cache, bool clean_shutdown)
1804{
1805 int r;
1806
1807 if (get_cache_mode(cache) >= CM_READ_ONLY)
1808 return -EINVAL;
1809
1810 atomic_inc(&cache->stats.commit_count);
1811 r = dm_cache_commit(cache->cmd, clean_shutdown);
1812 if (r)
1813 metadata_operation_failed(cache, "dm_cache_commit", r);
1814
1815 return r;
1816}
1817
1818
1819
1820
1821static blk_status_t commit_op(void *context)
1822{
1823 struct cache *cache = context;
1824
1825 if (dm_cache_changed_this_transaction(cache->cmd))
1826 return errno_to_blk_status(commit(cache, false));
1827
1828 return 0;
1829}
1830
1831
1832
1833static bool process_flush_bio(struct cache *cache, struct bio *bio)
1834{
1835 struct per_bio_data *pb = get_per_bio_data(bio);
1836
1837 if (!pb->req_nr)
1838 remap_to_origin(cache, bio);
1839 else
1840 remap_to_cache(cache, bio, 0);
1841
1842 issue_after_commit(&cache->committer, bio);
1843 return true;
1844}
1845
1846static bool process_discard_bio(struct cache *cache, struct bio *bio)
1847{
1848 dm_dblock_t b, e;
1849
1850
1851
1852
1853 calc_discard_block_range(cache, bio, &b, &e);
1854 while (b != e) {
1855 set_discard(cache, b);
1856 b = to_dblock(from_dblock(b) + 1);
1857 }
1858
1859 if (cache->features.discard_passdown) {
1860 remap_to_origin(cache, bio);
1861 generic_make_request(bio);
1862 } else
1863 bio_endio(bio);
1864
1865 return false;
1866}
1867
1868static void process_deferred_bios(struct work_struct *ws)
1869{
1870 struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1871
1872 bool commit_needed = false;
1873 struct bio_list bios;
1874 struct bio *bio;
1875
1876 bio_list_init(&bios);
1877
1878 spin_lock_irq(&cache->lock);
1879 bio_list_merge(&bios, &cache->deferred_bios);
1880 bio_list_init(&cache->deferred_bios);
1881 spin_unlock_irq(&cache->lock);
1882
1883 while ((bio = bio_list_pop(&bios))) {
1884 if (bio->bi_opf & REQ_PREFLUSH)
1885 commit_needed = process_flush_bio(cache, bio) || commit_needed;
1886
1887 else if (bio_op(bio) == REQ_OP_DISCARD)
1888 commit_needed = process_discard_bio(cache, bio) || commit_needed;
1889
1890 else
1891 commit_needed = process_bio(cache, bio) || commit_needed;
1892 }
1893
1894 if (commit_needed)
1895 schedule_commit(&cache->committer);
1896}
1897
1898
1899
1900
1901
1902static void requeue_deferred_bios(struct cache *cache)
1903{
1904 struct bio *bio;
1905 struct bio_list bios;
1906
1907 bio_list_init(&bios);
1908 bio_list_merge(&bios, &cache->deferred_bios);
1909 bio_list_init(&cache->deferred_bios);
1910
1911 while ((bio = bio_list_pop(&bios))) {
1912 bio->bi_status = BLK_STS_DM_REQUEUE;
1913 bio_endio(bio);
1914 }
1915}
1916
1917
1918
1919
1920
1921static void do_waker(struct work_struct *ws)
1922{
1923 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1924
1925 policy_tick(cache->policy, true);
1926 wake_migration_worker(cache);
1927 schedule_commit(&cache->committer);
1928 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1929}
1930
1931static void check_migrations(struct work_struct *ws)
1932{
1933 int r;
1934 struct policy_work *op;
1935 struct cache *cache = container_of(ws, struct cache, migration_worker);
1936 enum busy b;
1937
1938 for (;;) {
1939 b = spare_migration_bandwidth(cache);
1940
1941 r = policy_get_background_work(cache->policy, b == IDLE, &op);
1942 if (r == -ENODATA)
1943 break;
1944
1945 if (r) {
1946 DMERR_LIMIT("%s: policy_background_work failed",
1947 cache_device_name(cache));
1948 break;
1949 }
1950
1951 r = mg_start(cache, op, NULL);
1952 if (r)
1953 break;
1954 }
1955}
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965static void destroy(struct cache *cache)
1966{
1967 unsigned i;
1968
1969 mempool_exit(&cache->migration_pool);
1970
1971 if (cache->prison)
1972 dm_bio_prison_destroy_v2(cache->prison);
1973
1974 if (cache->wq)
1975 destroy_workqueue(cache->wq);
1976
1977 if (cache->dirty_bitset)
1978 free_bitset(cache->dirty_bitset);
1979
1980 if (cache->discard_bitset)
1981 free_bitset(cache->discard_bitset);
1982
1983 if (cache->copier)
1984 dm_kcopyd_client_destroy(cache->copier);
1985
1986 if (cache->cmd)
1987 dm_cache_metadata_close(cache->cmd);
1988
1989 if (cache->metadata_dev)
1990 dm_put_device(cache->ti, cache->metadata_dev);
1991
1992 if (cache->origin_dev)
1993 dm_put_device(cache->ti, cache->origin_dev);
1994
1995 if (cache->cache_dev)
1996 dm_put_device(cache->ti, cache->cache_dev);
1997
1998 if (cache->policy)
1999 dm_cache_policy_destroy(cache->policy);
2000
2001 for (i = 0; i < cache->nr_ctr_args ; i++)
2002 kfree(cache->ctr_args[i]);
2003 kfree(cache->ctr_args);
2004
2005 bioset_exit(&cache->bs);
2006
2007 kfree(cache);
2008}
2009
2010static void cache_dtr(struct dm_target *ti)
2011{
2012 struct cache *cache = ti->private;
2013
2014 destroy(cache);
2015}
2016
2017static sector_t get_dev_size(struct dm_dev *dev)
2018{
2019 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2020}
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053struct cache_args {
2054 struct dm_target *ti;
2055
2056 struct dm_dev *metadata_dev;
2057
2058 struct dm_dev *cache_dev;
2059 sector_t cache_sectors;
2060
2061 struct dm_dev *origin_dev;
2062 sector_t origin_sectors;
2063
2064 uint32_t block_size;
2065
2066 const char *policy_name;
2067 int policy_argc;
2068 const char **policy_argv;
2069
2070 struct cache_features features;
2071};
2072
2073static void destroy_cache_args(struct cache_args *ca)
2074{
2075 if (ca->metadata_dev)
2076 dm_put_device(ca->ti, ca->metadata_dev);
2077
2078 if (ca->cache_dev)
2079 dm_put_device(ca->ti, ca->cache_dev);
2080
2081 if (ca->origin_dev)
2082 dm_put_device(ca->ti, ca->origin_dev);
2083
2084 kfree(ca);
2085}
2086
2087static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2088{
2089 if (!as->argc) {
2090 *error = "Insufficient args";
2091 return false;
2092 }
2093
2094 return true;
2095}
2096
2097static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2098 char **error)
2099{
2100 int r;
2101 sector_t metadata_dev_size;
2102 char b[BDEVNAME_SIZE];
2103
2104 if (!at_least_one_arg(as, error))
2105 return -EINVAL;
2106
2107 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2108 &ca->metadata_dev);
2109 if (r) {
2110 *error = "Error opening metadata device";
2111 return r;
2112 }
2113
2114 metadata_dev_size = get_dev_size(ca->metadata_dev);
2115 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2116 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2117 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2118
2119 return 0;
2120}
2121
2122static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2123 char **error)
2124{
2125 int r;
2126
2127 if (!at_least_one_arg(as, error))
2128 return -EINVAL;
2129
2130 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2131 &ca->cache_dev);
2132 if (r) {
2133 *error = "Error opening cache device";
2134 return r;
2135 }
2136 ca->cache_sectors = get_dev_size(ca->cache_dev);
2137
2138 return 0;
2139}
2140
2141static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2142 char **error)
2143{
2144 int r;
2145
2146 if (!at_least_one_arg(as, error))
2147 return -EINVAL;
2148
2149 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2150 &ca->origin_dev);
2151 if (r) {
2152 *error = "Error opening origin device";
2153 return r;
2154 }
2155
2156 ca->origin_sectors = get_dev_size(ca->origin_dev);
2157 if (ca->ti->len > ca->origin_sectors) {
2158 *error = "Device size larger than cached device";
2159 return -EINVAL;
2160 }
2161
2162 return 0;
2163}
2164
2165static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2166 char **error)
2167{
2168 unsigned long block_size;
2169
2170 if (!at_least_one_arg(as, error))
2171 return -EINVAL;
2172
2173 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2174 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2175 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2176 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2177 *error = "Invalid data block size";
2178 return -EINVAL;
2179 }
2180
2181 if (block_size > ca->cache_sectors) {
2182 *error = "Data block size is larger than the cache device";
2183 return -EINVAL;
2184 }
2185
2186 ca->block_size = block_size;
2187
2188 return 0;
2189}
2190
2191static void init_features(struct cache_features *cf)
2192{
2193 cf->mode = CM_WRITE;
2194 cf->io_mode = CM_IO_WRITEBACK;
2195 cf->metadata_version = 1;
2196 cf->discard_passdown = true;
2197}
2198
2199static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2200 char **error)
2201{
2202 static const struct dm_arg _args[] = {
2203 {0, 3, "Invalid number of cache feature arguments"},
2204 };
2205
2206 int r, mode_ctr = 0;
2207 unsigned argc;
2208 const char *arg;
2209 struct cache_features *cf = &ca->features;
2210
2211 init_features(cf);
2212
2213 r = dm_read_arg_group(_args, as, &argc, error);
2214 if (r)
2215 return -EINVAL;
2216
2217 while (argc--) {
2218 arg = dm_shift_arg(as);
2219
2220 if (!strcasecmp(arg, "writeback")) {
2221 cf->io_mode = CM_IO_WRITEBACK;
2222 mode_ctr++;
2223 }
2224
2225 else if (!strcasecmp(arg, "writethrough")) {
2226 cf->io_mode = CM_IO_WRITETHROUGH;
2227 mode_ctr++;
2228 }
2229
2230 else if (!strcasecmp(arg, "passthrough")) {
2231 cf->io_mode = CM_IO_PASSTHROUGH;
2232 mode_ctr++;
2233 }
2234
2235 else if (!strcasecmp(arg, "metadata2"))
2236 cf->metadata_version = 2;
2237
2238 else if (!strcasecmp(arg, "no_discard_passdown"))
2239 cf->discard_passdown = false;
2240
2241 else {
2242 *error = "Unrecognised cache feature requested";
2243 return -EINVAL;
2244 }
2245 }
2246
2247 if (mode_ctr > 1) {
2248 *error = "Duplicate cache io_mode features requested";
2249 return -EINVAL;
2250 }
2251
2252 return 0;
2253}
2254
2255static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2256 char **error)
2257{
2258 static const struct dm_arg _args[] = {
2259 {0, 1024, "Invalid number of policy arguments"},
2260 };
2261
2262 int r;
2263
2264 if (!at_least_one_arg(as, error))
2265 return -EINVAL;
2266
2267 ca->policy_name = dm_shift_arg(as);
2268
2269 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2270 if (r)
2271 return -EINVAL;
2272
2273 ca->policy_argv = (const char **)as->argv;
2274 dm_consume_args(as, ca->policy_argc);
2275
2276 return 0;
2277}
2278
2279static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2280 char **error)
2281{
2282 int r;
2283 struct dm_arg_set as;
2284
2285 as.argc = argc;
2286 as.argv = argv;
2287
2288 r = parse_metadata_dev(ca, &as, error);
2289 if (r)
2290 return r;
2291
2292 r = parse_cache_dev(ca, &as, error);
2293 if (r)
2294 return r;
2295
2296 r = parse_origin_dev(ca, &as, error);
2297 if (r)
2298 return r;
2299
2300 r = parse_block_size(ca, &as, error);
2301 if (r)
2302 return r;
2303
2304 r = parse_features(ca, &as, error);
2305 if (r)
2306 return r;
2307
2308 r = parse_policy(ca, &as, error);
2309 if (r)
2310 return r;
2311
2312 return 0;
2313}
2314
2315
2316
2317static struct kmem_cache *migration_cache;
2318
2319#define NOT_CORE_OPTION 1
2320
2321static int process_config_option(struct cache *cache, const char *key, const char *value)
2322{
2323 unsigned long tmp;
2324
2325 if (!strcasecmp(key, "migration_threshold")) {
2326 if (kstrtoul(value, 10, &tmp))
2327 return -EINVAL;
2328
2329 cache->migration_threshold = tmp;
2330 return 0;
2331 }
2332
2333 return NOT_CORE_OPTION;
2334}
2335
2336static int set_config_value(struct cache *cache, const char *key, const char *value)
2337{
2338 int r = process_config_option(cache, key, value);
2339
2340 if (r == NOT_CORE_OPTION)
2341 r = policy_set_config_value(cache->policy, key, value);
2342
2343 if (r)
2344 DMWARN("bad config value for %s: %s", key, value);
2345
2346 return r;
2347}
2348
2349static int set_config_values(struct cache *cache, int argc, const char **argv)
2350{
2351 int r = 0;
2352
2353 if (argc & 1) {
2354 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2355 return -EINVAL;
2356 }
2357
2358 while (argc) {
2359 r = set_config_value(cache, argv[0], argv[1]);
2360 if (r)
2361 break;
2362
2363 argc -= 2;
2364 argv += 2;
2365 }
2366
2367 return r;
2368}
2369
2370static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2371 char **error)
2372{
2373 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2374 cache->cache_size,
2375 cache->origin_sectors,
2376 cache->sectors_per_block);
2377 if (IS_ERR(p)) {
2378 *error = "Error creating cache's policy";
2379 return PTR_ERR(p);
2380 }
2381 cache->policy = p;
2382 BUG_ON(!cache->policy);
2383
2384 return 0;
2385}
2386
2387
2388
2389
2390
2391#define MAX_DISCARD_BLOCKS (1 << 14)
2392
2393static bool too_many_discard_blocks(sector_t discard_block_size,
2394 sector_t origin_size)
2395{
2396 (void) sector_div(origin_size, discard_block_size);
2397
2398 return origin_size > MAX_DISCARD_BLOCKS;
2399}
2400
2401static sector_t calculate_discard_block_size(sector_t cache_block_size,
2402 sector_t origin_size)
2403{
2404 sector_t discard_block_size = cache_block_size;
2405
2406 if (origin_size)
2407 while (too_many_discard_blocks(discard_block_size, origin_size))
2408 discard_block_size *= 2;
2409
2410 return discard_block_size;
2411}
2412
2413static void set_cache_size(struct cache *cache, dm_cblock_t size)
2414{
2415 dm_block_t nr_blocks = from_cblock(size);
2416
2417 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2418 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2419 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2420 "Please consider increasing the cache block size to reduce the overall cache block count.",
2421 (unsigned long long) nr_blocks);
2422
2423 cache->cache_size = size;
2424}
2425
2426static int is_congested(struct dm_dev *dev, int bdi_bits)
2427{
2428 struct request_queue *q = bdev_get_queue(dev->bdev);
2429 return bdi_congested(q->backing_dev_info, bdi_bits);
2430}
2431
2432static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2433{
2434 struct cache *cache = container_of(cb, struct cache, callbacks);
2435
2436 return is_congested(cache->origin_dev, bdi_bits) ||
2437 is_congested(cache->cache_dev, bdi_bits);
2438}
2439
2440#define DEFAULT_MIGRATION_THRESHOLD 2048
2441
2442static int cache_create(struct cache_args *ca, struct cache **result)
2443{
2444 int r = 0;
2445 char **error = &ca->ti->error;
2446 struct cache *cache;
2447 struct dm_target *ti = ca->ti;
2448 dm_block_t origin_blocks;
2449 struct dm_cache_metadata *cmd;
2450 bool may_format = ca->features.mode == CM_WRITE;
2451
2452 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2453 if (!cache)
2454 return -ENOMEM;
2455
2456 cache->ti = ca->ti;
2457 ti->private = cache;
2458 ti->num_flush_bios = 2;
2459 ti->flush_supported = true;
2460
2461 ti->num_discard_bios = 1;
2462 ti->discards_supported = true;
2463
2464 ti->per_io_data_size = sizeof(struct per_bio_data);
2465
2466 cache->features = ca->features;
2467 if (writethrough_mode(cache)) {
2468
2469 r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
2470 if (r)
2471 goto bad;
2472 }
2473
2474 cache->callbacks.congested_fn = cache_is_congested;
2475 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2476
2477 cache->metadata_dev = ca->metadata_dev;
2478 cache->origin_dev = ca->origin_dev;
2479 cache->cache_dev = ca->cache_dev;
2480
2481 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2482
2483 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2484 origin_blocks = block_div(origin_blocks, ca->block_size);
2485 cache->origin_blocks = to_oblock(origin_blocks);
2486
2487 cache->sectors_per_block = ca->block_size;
2488 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2489 r = -EINVAL;
2490 goto bad;
2491 }
2492
2493 if (ca->block_size & (ca->block_size - 1)) {
2494 dm_block_t cache_size = ca->cache_sectors;
2495
2496 cache->sectors_per_block_shift = -1;
2497 cache_size = block_div(cache_size, ca->block_size);
2498 set_cache_size(cache, to_cblock(cache_size));
2499 } else {
2500 cache->sectors_per_block_shift = __ffs(ca->block_size);
2501 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2502 }
2503
2504 r = create_cache_policy(cache, ca, error);
2505 if (r)
2506 goto bad;
2507
2508 cache->policy_nr_args = ca->policy_argc;
2509 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2510
2511 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2512 if (r) {
2513 *error = "Error setting cache policy's config values";
2514 goto bad;
2515 }
2516
2517 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2518 ca->block_size, may_format,
2519 dm_cache_policy_get_hint_size(cache->policy),
2520 ca->features.metadata_version);
2521 if (IS_ERR(cmd)) {
2522 *error = "Error creating metadata object";
2523 r = PTR_ERR(cmd);
2524 goto bad;
2525 }
2526 cache->cmd = cmd;
2527 set_cache_mode(cache, CM_WRITE);
2528 if (get_cache_mode(cache) != CM_WRITE) {
2529 *error = "Unable to get write access to metadata, please check/repair metadata.";
2530 r = -EINVAL;
2531 goto bad;
2532 }
2533
2534 if (passthrough_mode(cache)) {
2535 bool all_clean;
2536
2537 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2538 if (r) {
2539 *error = "dm_cache_metadata_all_clean() failed";
2540 goto bad;
2541 }
2542
2543 if (!all_clean) {
2544 *error = "Cannot enter passthrough mode unless all blocks are clean";
2545 r = -EINVAL;
2546 goto bad;
2547 }
2548
2549 policy_allow_migrations(cache->policy, false);
2550 }
2551
2552 spin_lock_init(&cache->lock);
2553 bio_list_init(&cache->deferred_bios);
2554 atomic_set(&cache->nr_allocated_migrations, 0);
2555 atomic_set(&cache->nr_io_migrations, 0);
2556 init_waitqueue_head(&cache->migration_wait);
2557
2558 r = -ENOMEM;
2559 atomic_set(&cache->nr_dirty, 0);
2560 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2561 if (!cache->dirty_bitset) {
2562 *error = "could not allocate dirty bitset";
2563 goto bad;
2564 }
2565 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2566
2567 cache->discard_block_size =
2568 calculate_discard_block_size(cache->sectors_per_block,
2569 cache->origin_sectors);
2570 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2571 cache->discard_block_size));
2572 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2573 if (!cache->discard_bitset) {
2574 *error = "could not allocate discard bitset";
2575 goto bad;
2576 }
2577 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2578
2579 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2580 if (IS_ERR(cache->copier)) {
2581 *error = "could not create kcopyd client";
2582 r = PTR_ERR(cache->copier);
2583 goto bad;
2584 }
2585
2586 cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2587 if (!cache->wq) {
2588 *error = "could not create workqueue for metadata object";
2589 goto bad;
2590 }
2591 INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2592 INIT_WORK(&cache->migration_worker, check_migrations);
2593 INIT_DELAYED_WORK(&cache->waker, do_waker);
2594
2595 cache->prison = dm_bio_prison_create_v2(cache->wq);
2596 if (!cache->prison) {
2597 *error = "could not create bio prison";
2598 goto bad;
2599 }
2600
2601 r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
2602 migration_cache);
2603 if (r) {
2604 *error = "Error creating cache's migration mempool";
2605 goto bad;
2606 }
2607
2608 cache->need_tick_bio = true;
2609 cache->sized = false;
2610 cache->invalidate = false;
2611 cache->commit_requested = false;
2612 cache->loaded_mappings = false;
2613 cache->loaded_discards = false;
2614
2615 load_stats(cache);
2616
2617 atomic_set(&cache->stats.demotion, 0);
2618 atomic_set(&cache->stats.promotion, 0);
2619 atomic_set(&cache->stats.copies_avoided, 0);
2620 atomic_set(&cache->stats.cache_cell_clash, 0);
2621 atomic_set(&cache->stats.commit_count, 0);
2622 atomic_set(&cache->stats.discard_count, 0);
2623
2624 spin_lock_init(&cache->invalidation_lock);
2625 INIT_LIST_HEAD(&cache->invalidation_requests);
2626
2627 batcher_init(&cache->committer, commit_op, cache,
2628 issue_op, cache, cache->wq);
2629 iot_init(&cache->tracker);
2630
2631 init_rwsem(&cache->background_work_lock);
2632 prevent_background_work(cache);
2633
2634 *result = cache;
2635 return 0;
2636bad:
2637 destroy(cache);
2638 return r;
2639}
2640
2641static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2642{
2643 unsigned i;
2644 const char **copy;
2645
2646 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2647 if (!copy)
2648 return -ENOMEM;
2649 for (i = 0; i < argc; i++) {
2650 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2651 if (!copy[i]) {
2652 while (i--)
2653 kfree(copy[i]);
2654 kfree(copy);
2655 return -ENOMEM;
2656 }
2657 }
2658
2659 cache->nr_ctr_args = argc;
2660 cache->ctr_args = copy;
2661
2662 return 0;
2663}
2664
2665static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2666{
2667 int r = -EINVAL;
2668 struct cache_args *ca;
2669 struct cache *cache = NULL;
2670
2671 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2672 if (!ca) {
2673 ti->error = "Error allocating memory for cache";
2674 return -ENOMEM;
2675 }
2676 ca->ti = ti;
2677
2678 r = parse_cache_args(ca, argc, argv, &ti->error);
2679 if (r)
2680 goto out;
2681
2682 r = cache_create(ca, &cache);
2683 if (r)
2684 goto out;
2685
2686 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2687 if (r) {
2688 destroy(cache);
2689 goto out;
2690 }
2691
2692 ti->private = cache;
2693out:
2694 destroy_cache_args(ca);
2695 return r;
2696}
2697
2698
2699
2700static int cache_map(struct dm_target *ti, struct bio *bio)
2701{
2702 struct cache *cache = ti->private;
2703
2704 int r;
2705 bool commit_needed;
2706 dm_oblock_t block = get_bio_block(cache, bio);
2707
2708 init_per_bio_data(bio);
2709 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2710
2711
2712
2713
2714
2715 remap_to_origin(cache, bio);
2716 accounted_begin(cache, bio);
2717 return DM_MAPIO_REMAPPED;
2718 }
2719
2720 if (discard_or_flush(bio)) {
2721 defer_bio(cache, bio);
2722 return DM_MAPIO_SUBMITTED;
2723 }
2724
2725 r = map_bio(cache, bio, block, &commit_needed);
2726 if (commit_needed)
2727 schedule_commit(&cache->committer);
2728
2729 return r;
2730}
2731
2732static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
2733{
2734 struct cache *cache = ti->private;
2735 unsigned long flags;
2736 struct per_bio_data *pb = get_per_bio_data(bio);
2737
2738 if (pb->tick) {
2739 policy_tick(cache->policy, false);
2740
2741 spin_lock_irqsave(&cache->lock, flags);
2742 cache->need_tick_bio = true;
2743 spin_unlock_irqrestore(&cache->lock, flags);
2744 }
2745
2746 bio_drop_shared_lock(cache, bio);
2747 accounted_complete(cache, bio);
2748
2749 return DM_ENDIO_DONE;
2750}
2751
2752static int write_dirty_bitset(struct cache *cache)
2753{
2754 int r;
2755
2756 if (get_cache_mode(cache) >= CM_READ_ONLY)
2757 return -EINVAL;
2758
2759 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2760 if (r)
2761 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2762
2763 return r;
2764}
2765
2766static int write_discard_bitset(struct cache *cache)
2767{
2768 unsigned i, r;
2769
2770 if (get_cache_mode(cache) >= CM_READ_ONLY)
2771 return -EINVAL;
2772
2773 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2774 cache->discard_nr_blocks);
2775 if (r) {
2776 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2777 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2778 return r;
2779 }
2780
2781 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2782 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2783 is_discarded(cache, to_dblock(i)));
2784 if (r) {
2785 metadata_operation_failed(cache, "dm_cache_set_discard", r);
2786 return r;
2787 }
2788 }
2789
2790 return 0;
2791}
2792
2793static int write_hints(struct cache *cache)
2794{
2795 int r;
2796
2797 if (get_cache_mode(cache) >= CM_READ_ONLY)
2798 return -EINVAL;
2799
2800 r = dm_cache_write_hints(cache->cmd, cache->policy);
2801 if (r) {
2802 metadata_operation_failed(cache, "dm_cache_write_hints", r);
2803 return r;
2804 }
2805
2806 return 0;
2807}
2808
2809
2810
2811
2812static bool sync_metadata(struct cache *cache)
2813{
2814 int r1, r2, r3, r4;
2815
2816 r1 = write_dirty_bitset(cache);
2817 if (r1)
2818 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2819
2820 r2 = write_discard_bitset(cache);
2821 if (r2)
2822 DMERR("%s: could not write discard bitset", cache_device_name(cache));
2823
2824 save_stats(cache);
2825
2826 r3 = write_hints(cache);
2827 if (r3)
2828 DMERR("%s: could not write hints", cache_device_name(cache));
2829
2830
2831
2832
2833
2834
2835 r4 = commit(cache, !r1 && !r2 && !r3);
2836 if (r4)
2837 DMERR("%s: could not write cache metadata", cache_device_name(cache));
2838
2839 return !r1 && !r2 && !r3 && !r4;
2840}
2841
2842static void cache_postsuspend(struct dm_target *ti)
2843{
2844 struct cache *cache = ti->private;
2845
2846 prevent_background_work(cache);
2847 BUG_ON(atomic_read(&cache->nr_io_migrations));
2848
2849 cancel_delayed_work_sync(&cache->waker);
2850 drain_workqueue(cache->wq);
2851 WARN_ON(cache->tracker.in_flight);
2852
2853
2854
2855
2856
2857 requeue_deferred_bios(cache);
2858
2859 if (get_cache_mode(cache) == CM_WRITE)
2860 (void) sync_metadata(cache);
2861}
2862
2863static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2864 bool dirty, uint32_t hint, bool hint_valid)
2865{
2866 int r;
2867 struct cache *cache = context;
2868
2869 if (dirty) {
2870 set_bit(from_cblock(cblock), cache->dirty_bitset);
2871 atomic_inc(&cache->nr_dirty);
2872 } else
2873 clear_bit(from_cblock(cblock), cache->dirty_bitset);
2874
2875 r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2876 if (r)
2877 return r;
2878
2879 return 0;
2880}
2881
2882
2883
2884
2885
2886
2887
2888struct discard_load_info {
2889 struct cache *cache;
2890
2891
2892
2893
2894
2895 dm_block_t block_size;
2896 dm_block_t discard_begin, discard_end;
2897};
2898
2899static void discard_load_info_init(struct cache *cache,
2900 struct discard_load_info *li)
2901{
2902 li->cache = cache;
2903 li->discard_begin = li->discard_end = 0;
2904}
2905
2906static void set_discard_range(struct discard_load_info *li)
2907{
2908 sector_t b, e;
2909
2910 if (li->discard_begin == li->discard_end)
2911 return;
2912
2913
2914
2915
2916 b = li->discard_begin * li->block_size;
2917 e = li->discard_end * li->block_size;
2918
2919
2920
2921
2922 b = dm_sector_div_up(b, li->cache->discard_block_size);
2923 sector_div(e, li->cache->discard_block_size);
2924
2925
2926
2927
2928
2929 if (e > from_dblock(li->cache->discard_nr_blocks))
2930 e = from_dblock(li->cache->discard_nr_blocks);
2931
2932 for (; b < e; b++)
2933 set_discard(li->cache, to_dblock(b));
2934}
2935
2936static int load_discard(void *context, sector_t discard_block_size,
2937 dm_dblock_t dblock, bool discard)
2938{
2939 struct discard_load_info *li = context;
2940
2941 li->block_size = discard_block_size;
2942
2943 if (discard) {
2944 if (from_dblock(dblock) == li->discard_end)
2945
2946
2947
2948 li->discard_end = li->discard_end + 1ULL;
2949
2950 else {
2951
2952
2953
2954 set_discard_range(li);
2955 li->discard_begin = from_dblock(dblock);
2956 li->discard_end = li->discard_begin + 1ULL;
2957 }
2958 } else {
2959 set_discard_range(li);
2960 li->discard_begin = li->discard_end = 0;
2961 }
2962
2963 return 0;
2964}
2965
2966static dm_cblock_t get_cache_dev_size(struct cache *cache)
2967{
2968 sector_t size = get_dev_size(cache->cache_dev);
2969 (void) sector_div(size, cache->sectors_per_block);
2970 return to_cblock(size);
2971}
2972
2973static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2974{
2975 if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
2976 if (cache->sized) {
2977 DMERR("%s: unable to extend cache due to missing cache table reload",
2978 cache_device_name(cache));
2979 return false;
2980 }
2981 }
2982
2983
2984
2985
2986 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
2987 new_size = to_cblock(from_cblock(new_size) + 1);
2988 if (is_dirty(cache, new_size)) {
2989 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
2990 cache_device_name(cache),
2991 (unsigned long long) from_cblock(new_size));
2992 return false;
2993 }
2994 }
2995
2996 return true;
2997}
2998
2999static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3000{
3001 int r;
3002
3003 r = dm_cache_resize(cache->cmd, new_size);
3004 if (r) {
3005 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3006 metadata_operation_failed(cache, "dm_cache_resize", r);
3007 return r;
3008 }
3009
3010 set_cache_size(cache, new_size);
3011
3012 return 0;
3013}
3014
3015static int cache_preresume(struct dm_target *ti)
3016{
3017 int r = 0;
3018 struct cache *cache = ti->private;
3019 dm_cblock_t csize = get_cache_dev_size(cache);
3020
3021
3022
3023
3024 if (!cache->sized) {
3025 r = resize_cache_dev(cache, csize);
3026 if (r)
3027 return r;
3028
3029 cache->sized = true;
3030
3031 } else if (csize != cache->cache_size) {
3032 if (!can_resize(cache, csize))
3033 return -EINVAL;
3034
3035 r = resize_cache_dev(cache, csize);
3036 if (r)
3037 return r;
3038 }
3039
3040 if (!cache->loaded_mappings) {
3041 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3042 load_mapping, cache);
3043 if (r) {
3044 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3045 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3046 return r;
3047 }
3048
3049 cache->loaded_mappings = true;
3050 }
3051
3052 if (!cache->loaded_discards) {
3053 struct discard_load_info li;
3054
3055
3056
3057
3058
3059
3060 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3061
3062 discard_load_info_init(cache, &li);
3063 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3064 if (r) {
3065 DMERR("%s: could not load origin discards", cache_device_name(cache));
3066 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3067 return r;
3068 }
3069 set_discard_range(&li);
3070
3071 cache->loaded_discards = true;
3072 }
3073
3074 return r;
3075}
3076
3077static void cache_resume(struct dm_target *ti)
3078{
3079 struct cache *cache = ti->private;
3080
3081 cache->need_tick_bio = true;
3082 allow_background_work(cache);
3083 do_waker(&cache->waker.work);
3084}
3085
3086static void emit_flags(struct cache *cache, char *result,
3087 unsigned maxlen, ssize_t *sz_ptr)
3088{
3089 ssize_t sz = *sz_ptr;
3090 struct cache_features *cf = &cache->features;
3091 unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
3092
3093 DMEMIT("%u ", count);
3094
3095 if (cf->metadata_version == 2)
3096 DMEMIT("metadata2 ");
3097
3098 if (writethrough_mode(cache))
3099 DMEMIT("writethrough ");
3100
3101 else if (passthrough_mode(cache))
3102 DMEMIT("passthrough ");
3103
3104 else if (writeback_mode(cache))
3105 DMEMIT("writeback ");
3106
3107 else {
3108 DMEMIT("unknown ");
3109 DMERR("%s: internal error: unknown io mode: %d",
3110 cache_device_name(cache), (int) cf->io_mode);
3111 }
3112
3113 if (!cf->discard_passdown)
3114 DMEMIT("no_discard_passdown ");
3115
3116 *sz_ptr = sz;
3117}
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130static void cache_status(struct dm_target *ti, status_type_t type,
3131 unsigned status_flags, char *result, unsigned maxlen)
3132{
3133 int r = 0;
3134 unsigned i;
3135 ssize_t sz = 0;
3136 dm_block_t nr_free_blocks_metadata = 0;
3137 dm_block_t nr_blocks_metadata = 0;
3138 char buf[BDEVNAME_SIZE];
3139 struct cache *cache = ti->private;
3140 dm_cblock_t residency;
3141 bool needs_check;
3142
3143 switch (type) {
3144 case STATUSTYPE_INFO:
3145 if (get_cache_mode(cache) == CM_FAIL) {
3146 DMEMIT("Fail");
3147 break;
3148 }
3149
3150
3151 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3152 (void) commit(cache, false);
3153
3154 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3155 if (r) {
3156 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3157 cache_device_name(cache), r);
3158 goto err;
3159 }
3160
3161 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3162 if (r) {
3163 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3164 cache_device_name(cache), r);
3165 goto err;
3166 }
3167
3168 residency = policy_residency(cache->policy);
3169
3170 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3171 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3172 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3173 (unsigned long long)nr_blocks_metadata,
3174 (unsigned long long)cache->sectors_per_block,
3175 (unsigned long long) from_cblock(residency),
3176 (unsigned long long) from_cblock(cache->cache_size),
3177 (unsigned) atomic_read(&cache->stats.read_hit),
3178 (unsigned) atomic_read(&cache->stats.read_miss),
3179 (unsigned) atomic_read(&cache->stats.write_hit),
3180 (unsigned) atomic_read(&cache->stats.write_miss),
3181 (unsigned) atomic_read(&cache->stats.demotion),
3182 (unsigned) atomic_read(&cache->stats.promotion),
3183 (unsigned long) atomic_read(&cache->nr_dirty));
3184
3185 emit_flags(cache, result, maxlen, &sz);
3186
3187 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3188
3189 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3190 if (sz < maxlen) {
3191 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3192 if (r)
3193 DMERR("%s: policy_emit_config_values returned %d",
3194 cache_device_name(cache), r);
3195 }
3196
3197 if (get_cache_mode(cache) == CM_READ_ONLY)
3198 DMEMIT("ro ");
3199 else
3200 DMEMIT("rw ");
3201
3202 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3203
3204 if (r || needs_check)
3205 DMEMIT("needs_check ");
3206 else
3207 DMEMIT("- ");
3208
3209 break;
3210
3211 case STATUSTYPE_TABLE:
3212 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3213 DMEMIT("%s ", buf);
3214 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3215 DMEMIT("%s ", buf);
3216 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3217 DMEMIT("%s", buf);
3218
3219 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3220 DMEMIT(" %s", cache->ctr_args[i]);
3221 if (cache->nr_ctr_args)
3222 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3223 }
3224
3225 return;
3226
3227err:
3228 DMEMIT("Error");
3229}
3230
3231
3232
3233
3234
3235struct cblock_range {
3236 dm_cblock_t begin;
3237 dm_cblock_t end;
3238};
3239
3240
3241
3242
3243
3244
3245
3246static int parse_cblock_range(struct cache *cache, const char *str,
3247 struct cblock_range *result)
3248{
3249 char dummy;
3250 uint64_t b, e;
3251 int r;
3252
3253
3254
3255
3256 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3257 if (r < 0)
3258 return r;
3259
3260 if (r == 2) {
3261 result->begin = to_cblock(b);
3262 result->end = to_cblock(e);
3263 return 0;
3264 }
3265
3266
3267
3268
3269 r = sscanf(str, "%llu%c", &b, &dummy);
3270 if (r < 0)
3271 return r;
3272
3273 if (r == 1) {
3274 result->begin = to_cblock(b);
3275 result->end = to_cblock(from_cblock(result->begin) + 1u);
3276 return 0;
3277 }
3278
3279 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3280 return -EINVAL;
3281}
3282
3283static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3284{
3285 uint64_t b = from_cblock(range->begin);
3286 uint64_t e = from_cblock(range->end);
3287 uint64_t n = from_cblock(cache->cache_size);
3288
3289 if (b >= n) {
3290 DMERR("%s: begin cblock out of range: %llu >= %llu",
3291 cache_device_name(cache), b, n);
3292 return -EINVAL;
3293 }
3294
3295 if (e > n) {
3296 DMERR("%s: end cblock out of range: %llu > %llu",
3297 cache_device_name(cache), e, n);
3298 return -EINVAL;
3299 }
3300
3301 if (b >= e) {
3302 DMERR("%s: invalid cblock range: %llu >= %llu",
3303 cache_device_name(cache), b, e);
3304 return -EINVAL;
3305 }
3306
3307 return 0;
3308}
3309
3310static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3311{
3312 return to_cblock(from_cblock(b) + 1);
3313}
3314
3315static int request_invalidation(struct cache *cache, struct cblock_range *range)
3316{
3317 int r = 0;
3318
3319
3320
3321
3322
3323
3324
3325 while (range->begin != range->end) {
3326 r = invalidate_cblock(cache, range->begin);
3327 if (r)
3328 return r;
3329
3330 range->begin = cblock_succ(range->begin);
3331 }
3332
3333 cache->commit_requested = true;
3334 return r;
3335}
3336
3337static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3338 const char **cblock_ranges)
3339{
3340 int r = 0;
3341 unsigned i;
3342 struct cblock_range range;
3343
3344 if (!passthrough_mode(cache)) {
3345 DMERR("%s: cache has to be in passthrough mode for invalidation",
3346 cache_device_name(cache));
3347 return -EPERM;
3348 }
3349
3350 for (i = 0; i < count; i++) {
3351 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3352 if (r)
3353 break;
3354
3355 r = validate_cblock_range(cache, &range);
3356 if (r)
3357 break;
3358
3359
3360
3361
3362 r = request_invalidation(cache, &range);
3363 if (r)
3364 break;
3365 }
3366
3367 return r;
3368}
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
3379 char *result, unsigned maxlen)
3380{
3381 struct cache *cache = ti->private;
3382
3383 if (!argc)
3384 return -EINVAL;
3385
3386 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3387 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3388 cache_device_name(cache));
3389 return -EOPNOTSUPP;
3390 }
3391
3392 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3393 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3394
3395 if (argc != 2)
3396 return -EINVAL;
3397
3398 return set_config_value(cache, argv[0], argv[1]);
3399}
3400
3401static int cache_iterate_devices(struct dm_target *ti,
3402 iterate_devices_callout_fn fn, void *data)
3403{
3404 int r = 0;
3405 struct cache *cache = ti->private;
3406
3407 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3408 if (!r)
3409 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3410
3411 return r;
3412}
3413
3414static bool origin_dev_supports_discard(struct block_device *origin_bdev)
3415{
3416 struct request_queue *q = bdev_get_queue(origin_bdev);
3417
3418 return q && blk_queue_discard(q);
3419}
3420
3421
3422
3423
3424
3425static void disable_passdown_if_not_supported(struct cache *cache)
3426{
3427 struct block_device *origin_bdev = cache->origin_dev->bdev;
3428 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3429 const char *reason = NULL;
3430 char buf[BDEVNAME_SIZE];
3431
3432 if (!cache->features.discard_passdown)
3433 return;
3434
3435 if (!origin_dev_supports_discard(origin_bdev))
3436 reason = "discard unsupported";
3437
3438 else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
3439 reason = "max discard sectors smaller than a block";
3440
3441 if (reason) {
3442 DMWARN("Origin device (%s) %s: Disabling discard passdown.",
3443 bdevname(origin_bdev, buf), reason);
3444 cache->features.discard_passdown = false;
3445 }
3446}
3447
3448static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3449{
3450 struct block_device *origin_bdev = cache->origin_dev->bdev;
3451 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3452
3453 if (!cache->features.discard_passdown) {
3454
3455 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3456 cache->origin_sectors);
3457 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3458 return;
3459 }
3460
3461
3462
3463
3464
3465 limits->max_discard_sectors = origin_limits->max_discard_sectors;
3466 limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
3467 limits->discard_granularity = origin_limits->discard_granularity;
3468 limits->discard_alignment = origin_limits->discard_alignment;
3469 limits->discard_misaligned = origin_limits->discard_misaligned;
3470}
3471
3472static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3473{
3474 struct cache *cache = ti->private;
3475 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3476
3477
3478
3479
3480
3481 if (io_opt_sectors < cache->sectors_per_block ||
3482 do_div(io_opt_sectors, cache->sectors_per_block)) {
3483 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3484 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3485 }
3486
3487 disable_passdown_if_not_supported(cache);
3488 set_discard_limits(cache, limits);
3489}
3490
3491
3492
3493static struct target_type cache_target = {
3494 .name = "cache",
3495 .version = {2, 2, 0},
3496 .module = THIS_MODULE,
3497 .ctr = cache_ctr,
3498 .dtr = cache_dtr,
3499 .map = cache_map,
3500 .end_io = cache_end_io,
3501 .postsuspend = cache_postsuspend,
3502 .preresume = cache_preresume,
3503 .resume = cache_resume,
3504 .status = cache_status,
3505 .message = cache_message,
3506 .iterate_devices = cache_iterate_devices,
3507 .io_hints = cache_io_hints,
3508};
3509
3510static int __init dm_cache_init(void)
3511{
3512 int r;
3513
3514 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3515 if (!migration_cache)
3516 return -ENOMEM;
3517
3518 r = dm_register_target(&cache_target);
3519 if (r) {
3520 DMERR("cache target registration failed: %d", r);
3521 kmem_cache_destroy(migration_cache);
3522 return r;
3523 }
3524
3525 return 0;
3526}
3527
3528static void __exit dm_cache_exit(void)
3529{
3530 dm_unregister_target(&cache_target);
3531 kmem_cache_destroy(migration_cache);
3532}
3533
3534module_init(dm_cache_init);
3535module_exit(dm_cache_exit);
3536
3537MODULE_DESCRIPTION(DM_NAME " cache target");
3538MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3539MODULE_LICENSE("GPL");
3540