1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison-v2.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/rwsem.h>
19#include <linux/slab.h>
20#include <linux/vmalloc.h>
21
22#define DM_MSG_PREFIX "cache"
23
24DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
25 "A percentage of time allocated for copying to and/or from cache");
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42struct io_tracker {
43 spinlock_t lock;
44
45
46
47
48 sector_t in_flight;
49
50
51
52
53
54 unsigned long idle_time;
55 unsigned long last_update_time;
56};
57
58static void iot_init(struct io_tracker *iot)
59{
60 spin_lock_init(&iot->lock);
61 iot->in_flight = 0ul;
62 iot->idle_time = 0ul;
63 iot->last_update_time = jiffies;
64}
65
66static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
67{
68 if (iot->in_flight)
69 return false;
70
71 return time_after(jiffies, iot->idle_time + jifs);
72}
73
74static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
75{
76 bool r;
77 unsigned long flags;
78
79 spin_lock_irqsave(&iot->lock, flags);
80 r = __iot_idle_for(iot, jifs);
81 spin_unlock_irqrestore(&iot->lock, flags);
82
83 return r;
84}
85
86static void iot_io_begin(struct io_tracker *iot, sector_t len)
87{
88 unsigned long flags;
89
90 spin_lock_irqsave(&iot->lock, flags);
91 iot->in_flight += len;
92 spin_unlock_irqrestore(&iot->lock, flags);
93}
94
95static void __iot_io_end(struct io_tracker *iot, sector_t len)
96{
97 if (!len)
98 return;
99
100 iot->in_flight -= len;
101 if (!iot->in_flight)
102 iot->idle_time = jiffies;
103}
104
105static void iot_io_end(struct io_tracker *iot, sector_t len)
106{
107 unsigned long flags;
108
109 spin_lock_irqsave(&iot->lock, flags);
110 __iot_io_end(iot, len);
111 spin_unlock_irqrestore(&iot->lock, flags);
112}
113
114
115
116
117
118
119
120struct continuation {
121 struct work_struct ws;
122 int input;
123};
124
125static inline void init_continuation(struct continuation *k,
126 void (*fn)(struct work_struct *))
127{
128 INIT_WORK(&k->ws, fn);
129 k->input = 0;
130}
131
132static inline void queue_continuation(struct workqueue_struct *wq,
133 struct continuation *k)
134{
135 queue_work(wq, &k->ws);
136}
137
138
139
140
141
142
143
144struct batcher {
145
146
147
148 int (*commit_op)(void *context);
149 void *commit_context;
150
151
152
153
154
155 void (*issue_op)(struct bio *bio, void *context);
156 void *issue_context;
157
158
159
160
161 struct workqueue_struct *wq;
162
163 spinlock_t lock;
164 struct list_head work_items;
165 struct bio_list bios;
166 struct work_struct commit_work;
167
168 bool commit_scheduled;
169};
170
171static void __commit(struct work_struct *_ws)
172{
173 struct batcher *b = container_of(_ws, struct batcher, commit_work);
174
175 int r;
176 unsigned long flags;
177 struct list_head work_items;
178 struct work_struct *ws, *tmp;
179 struct continuation *k;
180 struct bio *bio;
181 struct bio_list bios;
182
183 INIT_LIST_HEAD(&work_items);
184 bio_list_init(&bios);
185
186
187
188
189
190 spin_lock_irqsave(&b->lock, flags);
191 list_splice_init(&b->work_items, &work_items);
192 bio_list_merge(&bios, &b->bios);
193 bio_list_init(&b->bios);
194 b->commit_scheduled = false;
195 spin_unlock_irqrestore(&b->lock, flags);
196
197 r = b->commit_op(b->commit_context);
198
199 list_for_each_entry_safe(ws, tmp, &work_items, entry) {
200 k = container_of(ws, struct continuation, ws);
201 k->input = r;
202 INIT_LIST_HEAD(&ws->entry);
203 queue_work(b->wq, ws);
204 }
205
206 while ((bio = bio_list_pop(&bios))) {
207 if (r) {
208 bio->bi_error = r;
209 bio_endio(bio);
210 } else
211 b->issue_op(bio, b->issue_context);
212 }
213}
214
215static void batcher_init(struct batcher *b,
216 int (*commit_op)(void *),
217 void *commit_context,
218 void (*issue_op)(struct bio *bio, void *),
219 void *issue_context,
220 struct workqueue_struct *wq)
221{
222 b->commit_op = commit_op;
223 b->commit_context = commit_context;
224 b->issue_op = issue_op;
225 b->issue_context = issue_context;
226 b->wq = wq;
227
228 spin_lock_init(&b->lock);
229 INIT_LIST_HEAD(&b->work_items);
230 bio_list_init(&b->bios);
231 INIT_WORK(&b->commit_work, __commit);
232 b->commit_scheduled = false;
233}
234
235static void async_commit(struct batcher *b)
236{
237 queue_work(b->wq, &b->commit_work);
238}
239
240static void continue_after_commit(struct batcher *b, struct continuation *k)
241{
242 unsigned long flags;
243 bool commit_scheduled;
244
245 spin_lock_irqsave(&b->lock, flags);
246 commit_scheduled = b->commit_scheduled;
247 list_add_tail(&k->ws.entry, &b->work_items);
248 spin_unlock_irqrestore(&b->lock, flags);
249
250 if (commit_scheduled)
251 async_commit(b);
252}
253
254
255
256
257static void issue_after_commit(struct batcher *b, struct bio *bio)
258{
259 unsigned long flags;
260 bool commit_scheduled;
261
262 spin_lock_irqsave(&b->lock, flags);
263 commit_scheduled = b->commit_scheduled;
264 bio_list_add(&b->bios, bio);
265 spin_unlock_irqrestore(&b->lock, flags);
266
267 if (commit_scheduled)
268 async_commit(b);
269}
270
271
272
273
274static void schedule_commit(struct batcher *b)
275{
276 bool immediate;
277 unsigned long flags;
278
279 spin_lock_irqsave(&b->lock, flags);
280 immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
281 b->commit_scheduled = true;
282 spin_unlock_irqrestore(&b->lock, flags);
283
284 if (immediate)
285 async_commit(b);
286}
287
288
289
290
291
292
293struct dm_hook_info {
294 bio_end_io_t *bi_end_io;
295};
296
297static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
298 bio_end_io_t *bi_end_io, void *bi_private)
299{
300 h->bi_end_io = bio->bi_end_io;
301
302 bio->bi_end_io = bi_end_io;
303 bio->bi_private = bi_private;
304}
305
306static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
307{
308 bio->bi_end_io = h->bi_end_io;
309}
310
311
312
313#define MIGRATION_POOL_SIZE 128
314#define COMMIT_PERIOD HZ
315#define MIGRATION_COUNT_WINDOW 10
316
317
318
319
320
321#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
322#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
323
324enum cache_metadata_mode {
325 CM_WRITE,
326 CM_READ_ONLY,
327 CM_FAIL
328};
329
330enum cache_io_mode {
331
332
333
334
335
336 CM_IO_WRITEBACK,
337
338
339
340
341
342 CM_IO_WRITETHROUGH,
343
344
345
346
347
348
349
350 CM_IO_PASSTHROUGH
351};
352
353struct cache_features {
354 enum cache_metadata_mode mode;
355 enum cache_io_mode io_mode;
356 unsigned metadata_version;
357};
358
359struct cache_stats {
360 atomic_t read_hit;
361 atomic_t read_miss;
362 atomic_t write_hit;
363 atomic_t write_miss;
364 atomic_t demotion;
365 atomic_t promotion;
366 atomic_t writeback;
367 atomic_t copies_avoided;
368 atomic_t cache_cell_clash;
369 atomic_t commit_count;
370 atomic_t discard_count;
371};
372
373struct cache {
374 struct dm_target *ti;
375 struct dm_target_callbacks callbacks;
376
377 struct dm_cache_metadata *cmd;
378
379
380
381
382 struct dm_dev *metadata_dev;
383
384
385
386
387 struct dm_dev *origin_dev;
388
389
390
391
392 struct dm_dev *cache_dev;
393
394
395
396
397 dm_oblock_t origin_blocks;
398 sector_t origin_sectors;
399
400
401
402
403 dm_cblock_t cache_size;
404
405
406
407
408 sector_t sectors_per_block;
409 int sectors_per_block_shift;
410
411 spinlock_t lock;
412 struct list_head deferred_cells;
413 struct bio_list deferred_bios;
414 struct bio_list deferred_writethrough_bios;
415 sector_t migration_threshold;
416 wait_queue_head_t migration_wait;
417 atomic_t nr_allocated_migrations;
418
419
420
421
422
423 atomic_t nr_io_migrations;
424
425 struct rw_semaphore quiesce_lock;
426
427
428
429
430 atomic_t nr_dirty;
431 unsigned long *dirty_bitset;
432
433
434
435
436 dm_dblock_t discard_nr_blocks;
437 unsigned long *discard_bitset;
438 uint32_t discard_block_size;
439
440
441
442
443
444 unsigned nr_ctr_args;
445 const char **ctr_args;
446
447 struct dm_kcopyd_client *copier;
448 struct workqueue_struct *wq;
449 struct work_struct deferred_bio_worker;
450 struct work_struct deferred_writethrough_worker;
451 struct work_struct migration_worker;
452 struct delayed_work waker;
453 struct dm_bio_prison_v2 *prison;
454
455 mempool_t *migration_pool;
456
457 struct dm_cache_policy *policy;
458 unsigned policy_nr_args;
459
460 bool need_tick_bio:1;
461 bool sized:1;
462 bool invalidate:1;
463 bool commit_requested:1;
464 bool loaded_mappings:1;
465 bool loaded_discards:1;
466
467
468
469
470 struct cache_features features;
471
472 struct cache_stats stats;
473
474
475
476
477 spinlock_t invalidation_lock;
478 struct list_head invalidation_requests;
479
480 struct io_tracker tracker;
481
482 struct work_struct commit_ws;
483 struct batcher committer;
484
485 struct rw_semaphore background_work_lock;
486};
487
488struct per_bio_data {
489 bool tick:1;
490 unsigned req_nr:2;
491 struct dm_bio_prison_cell_v2 *cell;
492 struct dm_hook_info hook_info;
493 sector_t len;
494
495
496
497
498
499
500 struct cache *cache;
501 dm_cblock_t cblock;
502 struct dm_bio_details bio_details;
503};
504
505struct dm_cache_migration {
506 struct continuation k;
507 struct cache *cache;
508
509 struct policy_work *op;
510 struct bio *overwrite_bio;
511 struct dm_bio_prison_cell_v2 *cell;
512
513 dm_cblock_t invalidate_cblock;
514 dm_oblock_t invalidate_oblock;
515};
516
517
518
519static bool writethrough_mode(struct cache_features *f)
520{
521 return f->io_mode == CM_IO_WRITETHROUGH;
522}
523
524static bool writeback_mode(struct cache_features *f)
525{
526 return f->io_mode == CM_IO_WRITEBACK;
527}
528
529static inline bool passthrough_mode(struct cache_features *f)
530{
531 return unlikely(f->io_mode == CM_IO_PASSTHROUGH);
532}
533
534
535
536static void wake_deferred_bio_worker(struct cache *cache)
537{
538 queue_work(cache->wq, &cache->deferred_bio_worker);
539}
540
541static void wake_deferred_writethrough_worker(struct cache *cache)
542{
543 queue_work(cache->wq, &cache->deferred_writethrough_worker);
544}
545
546static void wake_migration_worker(struct cache *cache)
547{
548 if (passthrough_mode(&cache->features))
549 return;
550
551 queue_work(cache->wq, &cache->migration_worker);
552}
553
554
555
556static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
557{
558 return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
559}
560
561static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
562{
563 dm_bio_prison_free_cell_v2(cache->prison, cell);
564}
565
566static struct dm_cache_migration *alloc_migration(struct cache *cache)
567{
568 struct dm_cache_migration *mg;
569
570 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
571 if (mg) {
572 mg->cache = cache;
573 atomic_inc(&mg->cache->nr_allocated_migrations);
574 }
575
576 return mg;
577}
578
579static void free_migration(struct dm_cache_migration *mg)
580{
581 struct cache *cache = mg->cache;
582
583 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
584 wake_up(&cache->migration_wait);
585
586 mempool_free(mg, cache->migration_pool);
587}
588
589
590
591static inline dm_oblock_t oblock_succ(dm_oblock_t b)
592{
593 return to_oblock(from_oblock(b) + 1ull);
594}
595
596static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
597{
598 key->virtual = 0;
599 key->dev = 0;
600 key->block_begin = from_oblock(begin);
601 key->block_end = from_oblock(end);
602}
603
604
605
606
607
608#define WRITE_LOCK_LEVEL 0
609#define READ_WRITE_LOCK_LEVEL 1
610
611static unsigned lock_level(struct bio *bio)
612{
613 return bio_data_dir(bio) == WRITE ?
614 WRITE_LOCK_LEVEL :
615 READ_WRITE_LOCK_LEVEL;
616}
617
618
619
620
621
622
623
624
625#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
626#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
627
628static size_t get_per_bio_data_size(struct cache *cache)
629{
630 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
631}
632
633static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
634{
635 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
636 BUG_ON(!pb);
637 return pb;
638}
639
640static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
641{
642 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
643
644 pb->tick = false;
645 pb->req_nr = dm_bio_get_target_bio_nr(bio);
646 pb->cell = NULL;
647 pb->len = 0;
648
649 return pb;
650}
651
652
653
654static void defer_bio(struct cache *cache, struct bio *bio)
655{
656 unsigned long flags;
657
658 spin_lock_irqsave(&cache->lock, flags);
659 bio_list_add(&cache->deferred_bios, bio);
660 spin_unlock_irqrestore(&cache->lock, flags);
661
662 wake_deferred_bio_worker(cache);
663}
664
665static void defer_bios(struct cache *cache, struct bio_list *bios)
666{
667 unsigned long flags;
668
669 spin_lock_irqsave(&cache->lock, flags);
670 bio_list_merge(&cache->deferred_bios, bios);
671 bio_list_init(bios);
672 spin_unlock_irqrestore(&cache->lock, flags);
673
674 wake_deferred_bio_worker(cache);
675}
676
677
678
679static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
680{
681 bool r;
682 size_t pb_size;
683 struct per_bio_data *pb;
684 struct dm_cell_key_v2 key;
685 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
686 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
687
688 cell_prealloc = alloc_prison_cell(cache);
689 if (!cell_prealloc) {
690 defer_bio(cache, bio);
691 return false;
692 }
693
694 build_key(oblock, end, &key);
695 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
696 if (!r) {
697
698
699
700 free_prison_cell(cache, cell_prealloc);
701 return r;
702 }
703
704 if (cell != cell_prealloc)
705 free_prison_cell(cache, cell_prealloc);
706
707 pb_size = get_per_bio_data_size(cache);
708 pb = get_per_bio_data(bio, pb_size);
709 pb->cell = cell;
710
711 return r;
712}
713
714
715
716static bool is_dirty(struct cache *cache, dm_cblock_t b)
717{
718 return test_bit(from_cblock(b), cache->dirty_bitset);
719}
720
721static void set_dirty(struct cache *cache, dm_cblock_t cblock)
722{
723 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
724 atomic_inc(&cache->nr_dirty);
725 policy_set_dirty(cache->policy, cblock);
726 }
727}
728
729
730
731
732
733static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
734{
735 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
736 atomic_inc(&cache->nr_dirty);
737 policy_set_dirty(cache->policy, cblock);
738}
739
740static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
741{
742 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
743 if (atomic_dec_return(&cache->nr_dirty) == 0)
744 dm_table_event(cache->ti->table);
745 }
746
747 policy_clear_dirty(cache->policy, cblock);
748}
749
750
751
752static bool block_size_is_power_of_two(struct cache *cache)
753{
754 return cache->sectors_per_block_shift >= 0;
755}
756
757
758#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
759__always_inline
760#endif
761static dm_block_t block_div(dm_block_t b, uint32_t n)
762{
763 do_div(b, n);
764
765 return b;
766}
767
768static dm_block_t oblocks_per_dblock(struct cache *cache)
769{
770 dm_block_t oblocks = cache->discard_block_size;
771
772 if (block_size_is_power_of_two(cache))
773 oblocks >>= cache->sectors_per_block_shift;
774 else
775 oblocks = block_div(oblocks, cache->sectors_per_block);
776
777 return oblocks;
778}
779
780static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
781{
782 return to_dblock(block_div(from_oblock(oblock),
783 oblocks_per_dblock(cache)));
784}
785
786static void set_discard(struct cache *cache, dm_dblock_t b)
787{
788 unsigned long flags;
789
790 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
791 atomic_inc(&cache->stats.discard_count);
792
793 spin_lock_irqsave(&cache->lock, flags);
794 set_bit(from_dblock(b), cache->discard_bitset);
795 spin_unlock_irqrestore(&cache->lock, flags);
796}
797
798static void clear_discard(struct cache *cache, dm_dblock_t b)
799{
800 unsigned long flags;
801
802 spin_lock_irqsave(&cache->lock, flags);
803 clear_bit(from_dblock(b), cache->discard_bitset);
804 spin_unlock_irqrestore(&cache->lock, flags);
805}
806
807static bool is_discarded(struct cache *cache, dm_dblock_t b)
808{
809 int r;
810 unsigned long flags;
811
812 spin_lock_irqsave(&cache->lock, flags);
813 r = test_bit(from_dblock(b), cache->discard_bitset);
814 spin_unlock_irqrestore(&cache->lock, flags);
815
816 return r;
817}
818
819static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
820{
821 int r;
822 unsigned long flags;
823
824 spin_lock_irqsave(&cache->lock, flags);
825 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
826 cache->discard_bitset);
827 spin_unlock_irqrestore(&cache->lock, flags);
828
829 return r;
830}
831
832
833
834
835static void remap_to_origin(struct cache *cache, struct bio *bio)
836{
837 bio->bi_bdev = cache->origin_dev->bdev;
838}
839
840static void remap_to_cache(struct cache *cache, struct bio *bio,
841 dm_cblock_t cblock)
842{
843 sector_t bi_sector = bio->bi_iter.bi_sector;
844 sector_t block = from_cblock(cblock);
845
846 bio->bi_bdev = cache->cache_dev->bdev;
847 if (!block_size_is_power_of_two(cache))
848 bio->bi_iter.bi_sector =
849 (block * cache->sectors_per_block) +
850 sector_div(bi_sector, cache->sectors_per_block);
851 else
852 bio->bi_iter.bi_sector =
853 (block << cache->sectors_per_block_shift) |
854 (bi_sector & (cache->sectors_per_block - 1));
855}
856
857static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
858{
859 unsigned long flags;
860 size_t pb_data_size = get_per_bio_data_size(cache);
861 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
862
863 spin_lock_irqsave(&cache->lock, flags);
864 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
865 bio_op(bio) != REQ_OP_DISCARD) {
866 pb->tick = true;
867 cache->need_tick_bio = false;
868 }
869 spin_unlock_irqrestore(&cache->lock, flags);
870}
871
872static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
873 dm_oblock_t oblock)
874{
875
876 check_if_tick_bio_needed(cache, bio);
877 remap_to_origin(cache, bio);
878 if (bio_data_dir(bio) == WRITE)
879 clear_discard(cache, oblock_to_dblock(cache, oblock));
880}
881
882static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
883 dm_oblock_t oblock, dm_cblock_t cblock)
884{
885 check_if_tick_bio_needed(cache, bio);
886 remap_to_cache(cache, bio, cblock);
887 if (bio_data_dir(bio) == WRITE) {
888 set_dirty(cache, cblock);
889 clear_discard(cache, oblock_to_dblock(cache, oblock));
890 }
891}
892
893static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
894{
895 sector_t block_nr = bio->bi_iter.bi_sector;
896
897 if (!block_size_is_power_of_two(cache))
898 (void) sector_div(block_nr, cache->sectors_per_block);
899 else
900 block_nr >>= cache->sectors_per_block_shift;
901
902 return to_oblock(block_nr);
903}
904
905static bool accountable_bio(struct cache *cache, struct bio *bio)
906{
907 return bio_op(bio) != REQ_OP_DISCARD;
908}
909
910static void accounted_begin(struct cache *cache, struct bio *bio)
911{
912 size_t pb_data_size = get_per_bio_data_size(cache);
913 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
914
915 if (accountable_bio(cache, bio)) {
916 pb->len = bio_sectors(bio);
917 iot_io_begin(&cache->tracker, pb->len);
918 }
919}
920
921static void accounted_complete(struct cache *cache, struct bio *bio)
922{
923 size_t pb_data_size = get_per_bio_data_size(cache);
924 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
925
926 iot_io_end(&cache->tracker, pb->len);
927}
928
929static void accounted_request(struct cache *cache, struct bio *bio)
930{
931 accounted_begin(cache, bio);
932 generic_make_request(bio);
933}
934
935static void issue_op(struct bio *bio, void *context)
936{
937 struct cache *cache = context;
938 accounted_request(cache, bio);
939}
940
941static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
942{
943 unsigned long flags;
944
945 spin_lock_irqsave(&cache->lock, flags);
946 bio_list_add(&cache->deferred_writethrough_bios, bio);
947 spin_unlock_irqrestore(&cache->lock, flags);
948
949 wake_deferred_writethrough_worker(cache);
950}
951
952static void writethrough_endio(struct bio *bio)
953{
954 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
955
956 dm_unhook_bio(&pb->hook_info, bio);
957
958 if (bio->bi_error) {
959 bio_endio(bio);
960 return;
961 }
962
963 dm_bio_restore(&pb->bio_details, bio);
964 remap_to_cache(pb->cache, bio, pb->cblock);
965
966
967
968
969
970
971 defer_writethrough_bio(pb->cache, bio);
972}
973
974
975
976
977
978
979
980
981static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
982 dm_oblock_t oblock, dm_cblock_t cblock)
983{
984 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
985
986 pb->cache = cache;
987 pb->cblock = cblock;
988 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
989 dm_bio_record(&pb->bio_details, bio);
990
991 remap_to_origin_clear_discard(pb->cache, bio, oblock);
992}
993
994
995
996
997static enum cache_metadata_mode get_cache_mode(struct cache *cache)
998{
999 return cache->features.mode;
1000}
1001
1002static const char *cache_device_name(struct cache *cache)
1003{
1004 return dm_device_name(dm_table_get_md(cache->ti->table));
1005}
1006
1007static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
1008{
1009 const char *descs[] = {
1010 "write",
1011 "read-only",
1012 "fail"
1013 };
1014
1015 dm_table_event(cache->ti->table);
1016 DMINFO("%s: switching cache to %s mode",
1017 cache_device_name(cache), descs[(int)mode]);
1018}
1019
1020static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
1021{
1022 bool needs_check;
1023 enum cache_metadata_mode old_mode = get_cache_mode(cache);
1024
1025 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
1026 DMERR("%s: unable to read needs_check flag, setting failure mode.",
1027 cache_device_name(cache));
1028 new_mode = CM_FAIL;
1029 }
1030
1031 if (new_mode == CM_WRITE && needs_check) {
1032 DMERR("%s: unable to switch cache to write mode until repaired.",
1033 cache_device_name(cache));
1034 if (old_mode != new_mode)
1035 new_mode = old_mode;
1036 else
1037 new_mode = CM_READ_ONLY;
1038 }
1039
1040
1041 if (old_mode == CM_FAIL)
1042 new_mode = CM_FAIL;
1043
1044 switch (new_mode) {
1045 case CM_FAIL:
1046 case CM_READ_ONLY:
1047 dm_cache_metadata_set_read_only(cache->cmd);
1048 break;
1049
1050 case CM_WRITE:
1051 dm_cache_metadata_set_read_write(cache->cmd);
1052 break;
1053 }
1054
1055 cache->features.mode = new_mode;
1056
1057 if (new_mode != old_mode)
1058 notify_mode_switch(cache, new_mode);
1059}
1060
1061static void abort_transaction(struct cache *cache)
1062{
1063 const char *dev_name = cache_device_name(cache);
1064
1065 if (get_cache_mode(cache) >= CM_READ_ONLY)
1066 return;
1067
1068 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
1069 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
1070 set_cache_mode(cache, CM_FAIL);
1071 }
1072
1073 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1074 if (dm_cache_metadata_abort(cache->cmd)) {
1075 DMERR("%s: failed to abort metadata transaction", dev_name);
1076 set_cache_mode(cache, CM_FAIL);
1077 }
1078}
1079
1080static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1081{
1082 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1083 cache_device_name(cache), op, r);
1084 abort_transaction(cache);
1085 set_cache_mode(cache, CM_READ_ONLY);
1086}
1087
1088
1089
1090static void load_stats(struct cache *cache)
1091{
1092 struct dm_cache_statistics stats;
1093
1094 dm_cache_metadata_get_stats(cache->cmd, &stats);
1095 atomic_set(&cache->stats.read_hit, stats.read_hits);
1096 atomic_set(&cache->stats.read_miss, stats.read_misses);
1097 atomic_set(&cache->stats.write_hit, stats.write_hits);
1098 atomic_set(&cache->stats.write_miss, stats.write_misses);
1099}
1100
1101static void save_stats(struct cache *cache)
1102{
1103 struct dm_cache_statistics stats;
1104
1105 if (get_cache_mode(cache) >= CM_READ_ONLY)
1106 return;
1107
1108 stats.read_hits = atomic_read(&cache->stats.read_hit);
1109 stats.read_misses = atomic_read(&cache->stats.read_miss);
1110 stats.write_hits = atomic_read(&cache->stats.write_hit);
1111 stats.write_misses = atomic_read(&cache->stats.write_miss);
1112
1113 dm_cache_metadata_set_stats(cache->cmd, &stats);
1114}
1115
1116static void update_stats(struct cache_stats *stats, enum policy_operation op)
1117{
1118 switch (op) {
1119 case POLICY_PROMOTE:
1120 atomic_inc(&stats->promotion);
1121 break;
1122
1123 case POLICY_DEMOTE:
1124 atomic_inc(&stats->demotion);
1125 break;
1126
1127 case POLICY_WRITEBACK:
1128 atomic_inc(&stats->writeback);
1129 break;
1130 }
1131}
1132
1133
1134
1135
1136
1137
1138
1139
1140static void inc_io_migrations(struct cache *cache)
1141{
1142 atomic_inc(&cache->nr_io_migrations);
1143}
1144
1145static void dec_io_migrations(struct cache *cache)
1146{
1147 atomic_dec(&cache->nr_io_migrations);
1148}
1149
1150static bool discard_or_flush(struct bio *bio)
1151{
1152 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1153}
1154
1155static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1156 dm_dblock_t *b, dm_dblock_t *e)
1157{
1158 sector_t sb = bio->bi_iter.bi_sector;
1159 sector_t se = bio_end_sector(bio);
1160
1161 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1162
1163 if (se - sb < cache->discard_block_size)
1164 *e = *b;
1165 else
1166 *e = to_dblock(block_div(se, cache->discard_block_size));
1167}
1168
1169
1170
1171static void prevent_background_work(struct cache *cache)
1172{
1173 lockdep_off();
1174 down_write(&cache->background_work_lock);
1175 lockdep_on();
1176}
1177
1178static void allow_background_work(struct cache *cache)
1179{
1180 lockdep_off();
1181 up_write(&cache->background_work_lock);
1182 lockdep_on();
1183}
1184
1185static bool background_work_begin(struct cache *cache)
1186{
1187 bool r;
1188
1189 lockdep_off();
1190 r = down_read_trylock(&cache->background_work_lock);
1191 lockdep_on();
1192
1193 return r;
1194}
1195
1196static void background_work_end(struct cache *cache)
1197{
1198 lockdep_off();
1199 up_read(&cache->background_work_lock);
1200 lockdep_on();
1201}
1202
1203
1204
1205static void quiesce(struct dm_cache_migration *mg,
1206 void (*continuation)(struct work_struct *))
1207{
1208 init_continuation(&mg->k, continuation);
1209 dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1210}
1211
1212static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1213{
1214 struct continuation *k = container_of(ws, struct continuation, ws);
1215 return container_of(k, struct dm_cache_migration, k);
1216}
1217
1218static void copy_complete(int read_err, unsigned long write_err, void *context)
1219{
1220 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1221
1222 if (read_err || write_err)
1223 mg->k.input = -EIO;
1224
1225 queue_continuation(mg->cache->wq, &mg->k);
1226}
1227
1228static int copy(struct dm_cache_migration *mg, bool promote)
1229{
1230 int r;
1231 struct dm_io_region o_region, c_region;
1232 struct cache *cache = mg->cache;
1233
1234 o_region.bdev = cache->origin_dev->bdev;
1235 o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1236 o_region.count = cache->sectors_per_block;
1237
1238 c_region.bdev = cache->cache_dev->bdev;
1239 c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1240 c_region.count = cache->sectors_per_block;
1241
1242 if (promote)
1243 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1244 else
1245 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1246
1247 return r;
1248}
1249
1250static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1251{
1252 size_t pb_data_size = get_per_bio_data_size(cache);
1253 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1254
1255 if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1256 free_prison_cell(cache, pb->cell);
1257 pb->cell = NULL;
1258}
1259
1260static void overwrite_endio(struct bio *bio)
1261{
1262 struct dm_cache_migration *mg = bio->bi_private;
1263 struct cache *cache = mg->cache;
1264 size_t pb_data_size = get_per_bio_data_size(cache);
1265 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1266
1267 dm_unhook_bio(&pb->hook_info, bio);
1268
1269 if (bio->bi_error)
1270 mg->k.input = bio->bi_error;
1271
1272 queue_continuation(mg->cache->wq, &mg->k);
1273}
1274
1275static void overwrite(struct dm_cache_migration *mg,
1276 void (*continuation)(struct work_struct *))
1277{
1278 struct bio *bio = mg->overwrite_bio;
1279 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1280 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1281
1282 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1283
1284
1285
1286
1287
1288 if (mg->op->op == POLICY_PROMOTE)
1289 remap_to_cache(mg->cache, bio, mg->op->cblock);
1290 else
1291 remap_to_origin(mg->cache, bio);
1292
1293 init_continuation(&mg->k, continuation);
1294 accounted_request(mg->cache, bio);
1295}
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308static void mg_complete(struct dm_cache_migration *mg, bool success)
1309{
1310 struct bio_list bios;
1311 struct cache *cache = mg->cache;
1312 struct policy_work *op = mg->op;
1313 dm_cblock_t cblock = op->cblock;
1314
1315 if (success)
1316 update_stats(&cache->stats, op->op);
1317
1318 switch (op->op) {
1319 case POLICY_PROMOTE:
1320 clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1321 policy_complete_background_work(cache->policy, op, success);
1322
1323 if (mg->overwrite_bio) {
1324 if (success)
1325 force_set_dirty(cache, cblock);
1326 else
1327 mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
1328 bio_endio(mg->overwrite_bio);
1329 } else {
1330 if (success)
1331 force_clear_dirty(cache, cblock);
1332 dec_io_migrations(cache);
1333 }
1334 break;
1335
1336 case POLICY_DEMOTE:
1337
1338
1339
1340 if (success)
1341 force_clear_dirty(cache, cblock);
1342 policy_complete_background_work(cache->policy, op, success);
1343 dec_io_migrations(cache);
1344 break;
1345
1346 case POLICY_WRITEBACK:
1347 if (success)
1348 force_clear_dirty(cache, cblock);
1349 policy_complete_background_work(cache->policy, op, success);
1350 dec_io_migrations(cache);
1351 break;
1352 }
1353
1354 bio_list_init(&bios);
1355 if (mg->cell) {
1356 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1357 free_prison_cell(cache, mg->cell);
1358 }
1359
1360 free_migration(mg);
1361 defer_bios(cache, &bios);
1362 wake_migration_worker(cache);
1363
1364 background_work_end(cache);
1365}
1366
1367static void mg_success(struct work_struct *ws)
1368{
1369 struct dm_cache_migration *mg = ws_to_mg(ws);
1370 mg_complete(mg, mg->k.input == 0);
1371}
1372
1373static void mg_update_metadata(struct work_struct *ws)
1374{
1375 int r;
1376 struct dm_cache_migration *mg = ws_to_mg(ws);
1377 struct cache *cache = mg->cache;
1378 struct policy_work *op = mg->op;
1379
1380 switch (op->op) {
1381 case POLICY_PROMOTE:
1382 r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1383 if (r) {
1384 DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1385 cache_device_name(cache));
1386 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1387
1388 mg_complete(mg, false);
1389 return;
1390 }
1391 mg_complete(mg, true);
1392 break;
1393
1394 case POLICY_DEMOTE:
1395 r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1396 if (r) {
1397 DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1398 cache_device_name(cache));
1399 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1400
1401 mg_complete(mg, false);
1402 return;
1403 }
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424 init_continuation(&mg->k, mg_success);
1425 continue_after_commit(&cache->committer, &mg->k);
1426 schedule_commit(&cache->committer);
1427 break;
1428
1429 case POLICY_WRITEBACK:
1430 mg_complete(mg, true);
1431 break;
1432 }
1433}
1434
1435static void mg_update_metadata_after_copy(struct work_struct *ws)
1436{
1437 struct dm_cache_migration *mg = ws_to_mg(ws);
1438
1439
1440
1441
1442 if (mg->k.input)
1443 mg_complete(mg, false);
1444 else
1445 mg_update_metadata(ws);
1446}
1447
1448static void mg_upgrade_lock(struct work_struct *ws)
1449{
1450 int r;
1451 struct dm_cache_migration *mg = ws_to_mg(ws);
1452
1453
1454
1455
1456 if (mg->k.input)
1457 mg_complete(mg, false);
1458
1459 else {
1460
1461
1462
1463 r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1464 READ_WRITE_LOCK_LEVEL);
1465 if (r < 0)
1466 mg_complete(mg, false);
1467
1468 else if (r)
1469 quiesce(mg, mg_update_metadata);
1470
1471 else
1472 mg_update_metadata(ws);
1473 }
1474}
1475
1476static void mg_copy(struct work_struct *ws)
1477{
1478 int r;
1479 struct dm_cache_migration *mg = ws_to_mg(ws);
1480
1481 if (mg->overwrite_bio) {
1482
1483
1484
1485
1486
1487
1488
1489 overwrite(mg, mg_update_metadata_after_copy);
1490
1491 } else {
1492 struct cache *cache = mg->cache;
1493 struct policy_work *op = mg->op;
1494 bool is_policy_promote = (op->op == POLICY_PROMOTE);
1495
1496 if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1497 is_discarded_oblock(cache, op->oblock)) {
1498 mg_upgrade_lock(ws);
1499 return;
1500 }
1501
1502 init_continuation(&mg->k, mg_upgrade_lock);
1503
1504 r = copy(mg, is_policy_promote);
1505 if (r) {
1506 DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
1507 mg->k.input = -EIO;
1508 mg_complete(mg, false);
1509 }
1510 }
1511}
1512
1513static int mg_lock_writes(struct dm_cache_migration *mg)
1514{
1515 int r;
1516 struct dm_cell_key_v2 key;
1517 struct cache *cache = mg->cache;
1518 struct dm_bio_prison_cell_v2 *prealloc;
1519
1520 prealloc = alloc_prison_cell(cache);
1521 if (!prealloc) {
1522 DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
1523 mg_complete(mg, false);
1524 return -ENOMEM;
1525 }
1526
1527
1528
1529
1530
1531
1532 build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1533 r = dm_cell_lock_v2(cache->prison, &key,
1534 mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1535 prealloc, &mg->cell);
1536 if (r < 0) {
1537 free_prison_cell(cache, prealloc);
1538 mg_complete(mg, false);
1539 return r;
1540 }
1541
1542 if (mg->cell != prealloc)
1543 free_prison_cell(cache, prealloc);
1544
1545 if (r == 0)
1546 mg_copy(&mg->k.ws);
1547 else
1548 quiesce(mg, mg_copy);
1549
1550 return 0;
1551}
1552
1553static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1554{
1555 struct dm_cache_migration *mg;
1556
1557 if (!background_work_begin(cache)) {
1558 policy_complete_background_work(cache->policy, op, false);
1559 return -EPERM;
1560 }
1561
1562 mg = alloc_migration(cache);
1563 if (!mg) {
1564 policy_complete_background_work(cache->policy, op, false);
1565 background_work_end(cache);
1566 return -ENOMEM;
1567 }
1568
1569 memset(mg, 0, sizeof(*mg));
1570
1571 mg->cache = cache;
1572 mg->op = op;
1573 mg->overwrite_bio = bio;
1574
1575 if (!bio)
1576 inc_io_migrations(cache);
1577
1578 return mg_lock_writes(mg);
1579}
1580
1581
1582
1583
1584
1585static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1586{
1587 struct bio_list bios;
1588 struct cache *cache = mg->cache;
1589
1590 bio_list_init(&bios);
1591 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1592 free_prison_cell(cache, mg->cell);
1593
1594 if (!success && mg->overwrite_bio)
1595 bio_io_error(mg->overwrite_bio);
1596
1597 free_migration(mg);
1598 defer_bios(cache, &bios);
1599
1600 background_work_end(cache);
1601}
1602
1603static void invalidate_completed(struct work_struct *ws)
1604{
1605 struct dm_cache_migration *mg = ws_to_mg(ws);
1606 invalidate_complete(mg, !mg->k.input);
1607}
1608
1609static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1610{
1611 int r = policy_invalidate_mapping(cache->policy, cblock);
1612 if (!r) {
1613 r = dm_cache_remove_mapping(cache->cmd, cblock);
1614 if (r) {
1615 DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1616 cache_device_name(cache));
1617 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1618 }
1619
1620 } else if (r == -ENODATA) {
1621
1622
1623
1624 r = 0;
1625
1626 } else
1627 DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1628
1629 return r;
1630}
1631
1632static void invalidate_remove(struct work_struct *ws)
1633{
1634 int r;
1635 struct dm_cache_migration *mg = ws_to_mg(ws);
1636 struct cache *cache = mg->cache;
1637
1638 r = invalidate_cblock(cache, mg->invalidate_cblock);
1639 if (r) {
1640 invalidate_complete(mg, false);
1641 return;
1642 }
1643
1644 init_continuation(&mg->k, invalidate_completed);
1645 continue_after_commit(&cache->committer, &mg->k);
1646 remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1647 mg->overwrite_bio = NULL;
1648 schedule_commit(&cache->committer);
1649}
1650
1651static int invalidate_lock(struct dm_cache_migration *mg)
1652{
1653 int r;
1654 struct dm_cell_key_v2 key;
1655 struct cache *cache = mg->cache;
1656 struct dm_bio_prison_cell_v2 *prealloc;
1657
1658 prealloc = alloc_prison_cell(cache);
1659 if (!prealloc) {
1660 invalidate_complete(mg, false);
1661 return -ENOMEM;
1662 }
1663
1664 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1665 r = dm_cell_lock_v2(cache->prison, &key,
1666 READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1667 if (r < 0) {
1668 free_prison_cell(cache, prealloc);
1669 invalidate_complete(mg, false);
1670 return r;
1671 }
1672
1673 if (mg->cell != prealloc)
1674 free_prison_cell(cache, prealloc);
1675
1676 if (r)
1677 quiesce(mg, invalidate_remove);
1678
1679 else {
1680
1681
1682
1683
1684 init_continuation(&mg->k, invalidate_remove);
1685 queue_work(cache->wq, &mg->k.ws);
1686 }
1687
1688 return 0;
1689}
1690
1691static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1692 dm_oblock_t oblock, struct bio *bio)
1693{
1694 struct dm_cache_migration *mg;
1695
1696 if (!background_work_begin(cache))
1697 return -EPERM;
1698
1699 mg = alloc_migration(cache);
1700 if (!mg) {
1701 background_work_end(cache);
1702 return -ENOMEM;
1703 }
1704
1705 memset(mg, 0, sizeof(*mg));
1706
1707 mg->cache = cache;
1708 mg->overwrite_bio = bio;
1709 mg->invalidate_cblock = cblock;
1710 mg->invalidate_oblock = oblock;
1711
1712 return invalidate_lock(mg);
1713}
1714
1715
1716
1717
1718
1719enum busy {
1720 IDLE,
1721 BUSY
1722};
1723
1724static enum busy spare_migration_bandwidth(struct cache *cache)
1725{
1726 bool idle = iot_idle_for(&cache->tracker, HZ);
1727 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1728 cache->sectors_per_block;
1729
1730 if (idle && current_volume <= cache->migration_threshold)
1731 return IDLE;
1732 else
1733 return BUSY;
1734}
1735
1736static void inc_hit_counter(struct cache *cache, struct bio *bio)
1737{
1738 atomic_inc(bio_data_dir(bio) == READ ?
1739 &cache->stats.read_hit : &cache->stats.write_hit);
1740}
1741
1742static void inc_miss_counter(struct cache *cache, struct bio *bio)
1743{
1744 atomic_inc(bio_data_dir(bio) == READ ?
1745 &cache->stats.read_miss : &cache->stats.write_miss);
1746}
1747
1748
1749
1750static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1751{
1752 return (bio_data_dir(bio) == WRITE) &&
1753 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1754}
1755
1756static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1757{
1758 return writeback_mode(&cache->features) &&
1759 (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1760}
1761
1762static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1763 bool *commit_needed)
1764{
1765 int r, data_dir;
1766 bool rb, background_queued;
1767 dm_cblock_t cblock;
1768 size_t pb_data_size = get_per_bio_data_size(cache);
1769 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1770
1771 *commit_needed = false;
1772
1773 rb = bio_detain_shared(cache, block, bio);
1774 if (!rb) {
1775
1776
1777
1778
1779
1780
1781 *commit_needed = true;
1782 return DM_MAPIO_SUBMITTED;
1783 }
1784
1785 data_dir = bio_data_dir(bio);
1786
1787 if (optimisable_bio(cache, bio, block)) {
1788 struct policy_work *op = NULL;
1789
1790 r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1791 if (unlikely(r && r != -ENOENT)) {
1792 DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1793 cache_device_name(cache), r);
1794 bio_io_error(bio);
1795 return DM_MAPIO_SUBMITTED;
1796 }
1797
1798 if (r == -ENOENT && op) {
1799 bio_drop_shared_lock(cache, bio);
1800 BUG_ON(op->op != POLICY_PROMOTE);
1801 mg_start(cache, op, bio);
1802 return DM_MAPIO_SUBMITTED;
1803 }
1804 } else {
1805 r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1806 if (unlikely(r && r != -ENOENT)) {
1807 DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1808 cache_device_name(cache), r);
1809 bio_io_error(bio);
1810 return DM_MAPIO_SUBMITTED;
1811 }
1812
1813 if (background_queued)
1814 wake_migration_worker(cache);
1815 }
1816
1817 if (r == -ENOENT) {
1818
1819
1820
1821 inc_miss_counter(cache, bio);
1822 if (pb->req_nr == 0) {
1823 accounted_begin(cache, bio);
1824 remap_to_origin_clear_discard(cache, bio, block);
1825
1826 } else {
1827
1828
1829
1830
1831 bio_endio(bio);
1832 return DM_MAPIO_SUBMITTED;
1833 }
1834 } else {
1835
1836
1837
1838 inc_hit_counter(cache, bio);
1839
1840
1841
1842
1843
1844 if (passthrough_mode(&cache->features)) {
1845 if (bio_data_dir(bio) == WRITE) {
1846 bio_drop_shared_lock(cache, bio);
1847 atomic_inc(&cache->stats.demotion);
1848 invalidate_start(cache, cblock, block, bio);
1849 } else
1850 remap_to_origin_clear_discard(cache, bio, block);
1851
1852 } else {
1853 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
1854 !is_dirty(cache, cblock)) {
1855 remap_to_origin_then_cache(cache, bio, block, cblock);
1856 accounted_begin(cache, bio);
1857 } else
1858 remap_to_cache_dirty(cache, bio, block, cblock);
1859 }
1860 }
1861
1862
1863
1864
1865 if (bio->bi_opf & REQ_FUA) {
1866
1867
1868
1869
1870 accounted_complete(cache, bio);
1871 issue_after_commit(&cache->committer, bio);
1872 *commit_needed = true;
1873 return DM_MAPIO_SUBMITTED;
1874 }
1875
1876 return DM_MAPIO_REMAPPED;
1877}
1878
1879static bool process_bio(struct cache *cache, struct bio *bio)
1880{
1881 bool commit_needed;
1882
1883 if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1884 generic_make_request(bio);
1885
1886 return commit_needed;
1887}
1888
1889
1890
1891
1892static int commit(struct cache *cache, bool clean_shutdown)
1893{
1894 int r;
1895
1896 if (get_cache_mode(cache) >= CM_READ_ONLY)
1897 return -EINVAL;
1898
1899 atomic_inc(&cache->stats.commit_count);
1900 r = dm_cache_commit(cache->cmd, clean_shutdown);
1901 if (r)
1902 metadata_operation_failed(cache, "dm_cache_commit", r);
1903
1904 return r;
1905}
1906
1907
1908
1909
1910static int commit_op(void *context)
1911{
1912 struct cache *cache = context;
1913
1914 if (dm_cache_changed_this_transaction(cache->cmd))
1915 return commit(cache, false);
1916
1917 return 0;
1918}
1919
1920
1921
1922static bool process_flush_bio(struct cache *cache, struct bio *bio)
1923{
1924 size_t pb_data_size = get_per_bio_data_size(cache);
1925 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1926
1927 if (!pb->req_nr)
1928 remap_to_origin(cache, bio);
1929 else
1930 remap_to_cache(cache, bio, 0);
1931
1932 issue_after_commit(&cache->committer, bio);
1933 return true;
1934}
1935
1936static bool process_discard_bio(struct cache *cache, struct bio *bio)
1937{
1938 dm_dblock_t b, e;
1939
1940
1941
1942
1943 calc_discard_block_range(cache, bio, &b, &e);
1944 while (b != e) {
1945 set_discard(cache, b);
1946 b = to_dblock(from_dblock(b) + 1);
1947 }
1948
1949 bio_endio(bio);
1950
1951 return false;
1952}
1953
1954static void process_deferred_bios(struct work_struct *ws)
1955{
1956 struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1957
1958 unsigned long flags;
1959 bool commit_needed = false;
1960 struct bio_list bios;
1961 struct bio *bio;
1962
1963 bio_list_init(&bios);
1964
1965 spin_lock_irqsave(&cache->lock, flags);
1966 bio_list_merge(&bios, &cache->deferred_bios);
1967 bio_list_init(&cache->deferred_bios);
1968 spin_unlock_irqrestore(&cache->lock, flags);
1969
1970 while ((bio = bio_list_pop(&bios))) {
1971 if (bio->bi_opf & REQ_PREFLUSH)
1972 commit_needed = process_flush_bio(cache, bio) || commit_needed;
1973
1974 else if (bio_op(bio) == REQ_OP_DISCARD)
1975 commit_needed = process_discard_bio(cache, bio) || commit_needed;
1976
1977 else
1978 commit_needed = process_bio(cache, bio) || commit_needed;
1979 }
1980
1981 if (commit_needed)
1982 schedule_commit(&cache->committer);
1983}
1984
1985static void process_deferred_writethrough_bios(struct work_struct *ws)
1986{
1987 struct cache *cache = container_of(ws, struct cache, deferred_writethrough_worker);
1988
1989 unsigned long flags;
1990 struct bio_list bios;
1991 struct bio *bio;
1992
1993 bio_list_init(&bios);
1994
1995 spin_lock_irqsave(&cache->lock, flags);
1996 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
1997 bio_list_init(&cache->deferred_writethrough_bios);
1998 spin_unlock_irqrestore(&cache->lock, flags);
1999
2000
2001
2002
2003 while ((bio = bio_list_pop(&bios)))
2004 generic_make_request(bio);
2005}
2006
2007
2008
2009
2010
2011static void requeue_deferred_bios(struct cache *cache)
2012{
2013 struct bio *bio;
2014 struct bio_list bios;
2015
2016 bio_list_init(&bios);
2017 bio_list_merge(&bios, &cache->deferred_bios);
2018 bio_list_init(&cache->deferred_bios);
2019
2020 while ((bio = bio_list_pop(&bios))) {
2021 bio->bi_error = DM_ENDIO_REQUEUE;
2022 bio_endio(bio);
2023 }
2024}
2025
2026
2027
2028
2029
2030static void do_waker(struct work_struct *ws)
2031{
2032 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
2033
2034 policy_tick(cache->policy, true);
2035 wake_migration_worker(cache);
2036 schedule_commit(&cache->committer);
2037 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
2038}
2039
2040static void check_migrations(struct work_struct *ws)
2041{
2042 int r;
2043 struct policy_work *op;
2044 struct cache *cache = container_of(ws, struct cache, migration_worker);
2045 enum busy b;
2046
2047 for (;;) {
2048 b = spare_migration_bandwidth(cache);
2049
2050 r = policy_get_background_work(cache->policy, b == IDLE, &op);
2051 if (r == -ENODATA)
2052 break;
2053
2054 if (r) {
2055 DMERR_LIMIT("%s: policy_background_work failed",
2056 cache_device_name(cache));
2057 break;
2058 }
2059
2060 r = mg_start(cache, op, NULL);
2061 if (r)
2062 break;
2063 }
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074static void destroy(struct cache *cache)
2075{
2076 unsigned i;
2077
2078 mempool_destroy(cache->migration_pool);
2079
2080 if (cache->prison)
2081 dm_bio_prison_destroy_v2(cache->prison);
2082
2083 if (cache->wq)
2084 destroy_workqueue(cache->wq);
2085
2086 if (cache->dirty_bitset)
2087 free_bitset(cache->dirty_bitset);
2088
2089 if (cache->discard_bitset)
2090 free_bitset(cache->discard_bitset);
2091
2092 if (cache->copier)
2093 dm_kcopyd_client_destroy(cache->copier);
2094
2095 if (cache->cmd)
2096 dm_cache_metadata_close(cache->cmd);
2097
2098 if (cache->metadata_dev)
2099 dm_put_device(cache->ti, cache->metadata_dev);
2100
2101 if (cache->origin_dev)
2102 dm_put_device(cache->ti, cache->origin_dev);
2103
2104 if (cache->cache_dev)
2105 dm_put_device(cache->ti, cache->cache_dev);
2106
2107 if (cache->policy)
2108 dm_cache_policy_destroy(cache->policy);
2109
2110 for (i = 0; i < cache->nr_ctr_args ; i++)
2111 kfree(cache->ctr_args[i]);
2112 kfree(cache->ctr_args);
2113
2114 kfree(cache);
2115}
2116
2117static void cache_dtr(struct dm_target *ti)
2118{
2119 struct cache *cache = ti->private;
2120
2121 destroy(cache);
2122}
2123
2124static sector_t get_dev_size(struct dm_dev *dev)
2125{
2126 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2127}
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160struct cache_args {
2161 struct dm_target *ti;
2162
2163 struct dm_dev *metadata_dev;
2164
2165 struct dm_dev *cache_dev;
2166 sector_t cache_sectors;
2167
2168 struct dm_dev *origin_dev;
2169 sector_t origin_sectors;
2170
2171 uint32_t block_size;
2172
2173 const char *policy_name;
2174 int policy_argc;
2175 const char **policy_argv;
2176
2177 struct cache_features features;
2178};
2179
2180static void destroy_cache_args(struct cache_args *ca)
2181{
2182 if (ca->metadata_dev)
2183 dm_put_device(ca->ti, ca->metadata_dev);
2184
2185 if (ca->cache_dev)
2186 dm_put_device(ca->ti, ca->cache_dev);
2187
2188 if (ca->origin_dev)
2189 dm_put_device(ca->ti, ca->origin_dev);
2190
2191 kfree(ca);
2192}
2193
2194static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2195{
2196 if (!as->argc) {
2197 *error = "Insufficient args";
2198 return false;
2199 }
2200
2201 return true;
2202}
2203
2204static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2205 char **error)
2206{
2207 int r;
2208 sector_t metadata_dev_size;
2209 char b[BDEVNAME_SIZE];
2210
2211 if (!at_least_one_arg(as, error))
2212 return -EINVAL;
2213
2214 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2215 &ca->metadata_dev);
2216 if (r) {
2217 *error = "Error opening metadata device";
2218 return r;
2219 }
2220
2221 metadata_dev_size = get_dev_size(ca->metadata_dev);
2222 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2223 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2224 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2225
2226 return 0;
2227}
2228
2229static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2230 char **error)
2231{
2232 int r;
2233
2234 if (!at_least_one_arg(as, error))
2235 return -EINVAL;
2236
2237 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2238 &ca->cache_dev);
2239 if (r) {
2240 *error = "Error opening cache device";
2241 return r;
2242 }
2243 ca->cache_sectors = get_dev_size(ca->cache_dev);
2244
2245 return 0;
2246}
2247
2248static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2249 char **error)
2250{
2251 int r;
2252
2253 if (!at_least_one_arg(as, error))
2254 return -EINVAL;
2255
2256 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2257 &ca->origin_dev);
2258 if (r) {
2259 *error = "Error opening origin device";
2260 return r;
2261 }
2262
2263 ca->origin_sectors = get_dev_size(ca->origin_dev);
2264 if (ca->ti->len > ca->origin_sectors) {
2265 *error = "Device size larger than cached device";
2266 return -EINVAL;
2267 }
2268
2269 return 0;
2270}
2271
2272static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2273 char **error)
2274{
2275 unsigned long block_size;
2276
2277 if (!at_least_one_arg(as, error))
2278 return -EINVAL;
2279
2280 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2281 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2282 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2283 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2284 *error = "Invalid data block size";
2285 return -EINVAL;
2286 }
2287
2288 if (block_size > ca->cache_sectors) {
2289 *error = "Data block size is larger than the cache device";
2290 return -EINVAL;
2291 }
2292
2293 ca->block_size = block_size;
2294
2295 return 0;
2296}
2297
2298static void init_features(struct cache_features *cf)
2299{
2300 cf->mode = CM_WRITE;
2301 cf->io_mode = CM_IO_WRITEBACK;
2302 cf->metadata_version = 1;
2303}
2304
2305static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2306 char **error)
2307{
2308 static struct dm_arg _args[] = {
2309 {0, 2, "Invalid number of cache feature arguments"},
2310 };
2311
2312 int r;
2313 unsigned argc;
2314 const char *arg;
2315 struct cache_features *cf = &ca->features;
2316
2317 init_features(cf);
2318
2319 r = dm_read_arg_group(_args, as, &argc, error);
2320 if (r)
2321 return -EINVAL;
2322
2323 while (argc--) {
2324 arg = dm_shift_arg(as);
2325
2326 if (!strcasecmp(arg, "writeback"))
2327 cf->io_mode = CM_IO_WRITEBACK;
2328
2329 else if (!strcasecmp(arg, "writethrough"))
2330 cf->io_mode = CM_IO_WRITETHROUGH;
2331
2332 else if (!strcasecmp(arg, "passthrough"))
2333 cf->io_mode = CM_IO_PASSTHROUGH;
2334
2335 else if (!strcasecmp(arg, "metadata2"))
2336 cf->metadata_version = 2;
2337
2338 else {
2339 *error = "Unrecognised cache feature requested";
2340 return -EINVAL;
2341 }
2342 }
2343
2344 return 0;
2345}
2346
2347static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2348 char **error)
2349{
2350 static struct dm_arg _args[] = {
2351 {0, 1024, "Invalid number of policy arguments"},
2352 };
2353
2354 int r;
2355
2356 if (!at_least_one_arg(as, error))
2357 return -EINVAL;
2358
2359 ca->policy_name = dm_shift_arg(as);
2360
2361 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2362 if (r)
2363 return -EINVAL;
2364
2365 ca->policy_argv = (const char **)as->argv;
2366 dm_consume_args(as, ca->policy_argc);
2367
2368 return 0;
2369}
2370
2371static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2372 char **error)
2373{
2374 int r;
2375 struct dm_arg_set as;
2376
2377 as.argc = argc;
2378 as.argv = argv;
2379
2380 r = parse_metadata_dev(ca, &as, error);
2381 if (r)
2382 return r;
2383
2384 r = parse_cache_dev(ca, &as, error);
2385 if (r)
2386 return r;
2387
2388 r = parse_origin_dev(ca, &as, error);
2389 if (r)
2390 return r;
2391
2392 r = parse_block_size(ca, &as, error);
2393 if (r)
2394 return r;
2395
2396 r = parse_features(ca, &as, error);
2397 if (r)
2398 return r;
2399
2400 r = parse_policy(ca, &as, error);
2401 if (r)
2402 return r;
2403
2404 return 0;
2405}
2406
2407
2408
2409static struct kmem_cache *migration_cache;
2410
2411#define NOT_CORE_OPTION 1
2412
2413static int process_config_option(struct cache *cache, const char *key, const char *value)
2414{
2415 unsigned long tmp;
2416
2417 if (!strcasecmp(key, "migration_threshold")) {
2418 if (kstrtoul(value, 10, &tmp))
2419 return -EINVAL;
2420
2421 cache->migration_threshold = tmp;
2422 return 0;
2423 }
2424
2425 return NOT_CORE_OPTION;
2426}
2427
2428static int set_config_value(struct cache *cache, const char *key, const char *value)
2429{
2430 int r = process_config_option(cache, key, value);
2431
2432 if (r == NOT_CORE_OPTION)
2433 r = policy_set_config_value(cache->policy, key, value);
2434
2435 if (r)
2436 DMWARN("bad config value for %s: %s", key, value);
2437
2438 return r;
2439}
2440
2441static int set_config_values(struct cache *cache, int argc, const char **argv)
2442{
2443 int r = 0;
2444
2445 if (argc & 1) {
2446 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2447 return -EINVAL;
2448 }
2449
2450 while (argc) {
2451 r = set_config_value(cache, argv[0], argv[1]);
2452 if (r)
2453 break;
2454
2455 argc -= 2;
2456 argv += 2;
2457 }
2458
2459 return r;
2460}
2461
2462static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2463 char **error)
2464{
2465 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2466 cache->cache_size,
2467 cache->origin_sectors,
2468 cache->sectors_per_block);
2469 if (IS_ERR(p)) {
2470 *error = "Error creating cache's policy";
2471 return PTR_ERR(p);
2472 }
2473 cache->policy = p;
2474 BUG_ON(!cache->policy);
2475
2476 return 0;
2477}
2478
2479
2480
2481
2482
2483#define MAX_DISCARD_BLOCKS (1 << 14)
2484
2485static bool too_many_discard_blocks(sector_t discard_block_size,
2486 sector_t origin_size)
2487{
2488 (void) sector_div(origin_size, discard_block_size);
2489
2490 return origin_size > MAX_DISCARD_BLOCKS;
2491}
2492
2493static sector_t calculate_discard_block_size(sector_t cache_block_size,
2494 sector_t origin_size)
2495{
2496 sector_t discard_block_size = cache_block_size;
2497
2498 if (origin_size)
2499 while (too_many_discard_blocks(discard_block_size, origin_size))
2500 discard_block_size *= 2;
2501
2502 return discard_block_size;
2503}
2504
2505static void set_cache_size(struct cache *cache, dm_cblock_t size)
2506{
2507 dm_block_t nr_blocks = from_cblock(size);
2508
2509 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2510 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2511 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2512 "Please consider increasing the cache block size to reduce the overall cache block count.",
2513 (unsigned long long) nr_blocks);
2514
2515 cache->cache_size = size;
2516}
2517
2518static int is_congested(struct dm_dev *dev, int bdi_bits)
2519{
2520 struct request_queue *q = bdev_get_queue(dev->bdev);
2521 return bdi_congested(q->backing_dev_info, bdi_bits);
2522}
2523
2524static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2525{
2526 struct cache *cache = container_of(cb, struct cache, callbacks);
2527
2528 return is_congested(cache->origin_dev, bdi_bits) ||
2529 is_congested(cache->cache_dev, bdi_bits);
2530}
2531
2532#define DEFAULT_MIGRATION_THRESHOLD 2048
2533
2534static int cache_create(struct cache_args *ca, struct cache **result)
2535{
2536 int r = 0;
2537 char **error = &ca->ti->error;
2538 struct cache *cache;
2539 struct dm_target *ti = ca->ti;
2540 dm_block_t origin_blocks;
2541 struct dm_cache_metadata *cmd;
2542 bool may_format = ca->features.mode == CM_WRITE;
2543
2544 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2545 if (!cache)
2546 return -ENOMEM;
2547
2548 cache->ti = ca->ti;
2549 ti->private = cache;
2550 ti->num_flush_bios = 2;
2551 ti->flush_supported = true;
2552
2553 ti->num_discard_bios = 1;
2554 ti->discards_supported = true;
2555 ti->split_discard_bios = false;
2556
2557 cache->features = ca->features;
2558 ti->per_io_data_size = get_per_bio_data_size(cache);
2559
2560 cache->callbacks.congested_fn = cache_is_congested;
2561 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2562
2563 cache->metadata_dev = ca->metadata_dev;
2564 cache->origin_dev = ca->origin_dev;
2565 cache->cache_dev = ca->cache_dev;
2566
2567 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2568
2569 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2570 origin_blocks = block_div(origin_blocks, ca->block_size);
2571 cache->origin_blocks = to_oblock(origin_blocks);
2572
2573 cache->sectors_per_block = ca->block_size;
2574 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2575 r = -EINVAL;
2576 goto bad;
2577 }
2578
2579 if (ca->block_size & (ca->block_size - 1)) {
2580 dm_block_t cache_size = ca->cache_sectors;
2581
2582 cache->sectors_per_block_shift = -1;
2583 cache_size = block_div(cache_size, ca->block_size);
2584 set_cache_size(cache, to_cblock(cache_size));
2585 } else {
2586 cache->sectors_per_block_shift = __ffs(ca->block_size);
2587 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2588 }
2589
2590 r = create_cache_policy(cache, ca, error);
2591 if (r)
2592 goto bad;
2593
2594 cache->policy_nr_args = ca->policy_argc;
2595 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2596
2597 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2598 if (r) {
2599 *error = "Error setting cache policy's config values";
2600 goto bad;
2601 }
2602
2603 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2604 ca->block_size, may_format,
2605 dm_cache_policy_get_hint_size(cache->policy),
2606 ca->features.metadata_version);
2607 if (IS_ERR(cmd)) {
2608 *error = "Error creating metadata object";
2609 r = PTR_ERR(cmd);
2610 goto bad;
2611 }
2612 cache->cmd = cmd;
2613 set_cache_mode(cache, CM_WRITE);
2614 if (get_cache_mode(cache) != CM_WRITE) {
2615 *error = "Unable to get write access to metadata, please check/repair metadata.";
2616 r = -EINVAL;
2617 goto bad;
2618 }
2619
2620 if (passthrough_mode(&cache->features)) {
2621 bool all_clean;
2622
2623 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2624 if (r) {
2625 *error = "dm_cache_metadata_all_clean() failed";
2626 goto bad;
2627 }
2628
2629 if (!all_clean) {
2630 *error = "Cannot enter passthrough mode unless all blocks are clean";
2631 r = -EINVAL;
2632 goto bad;
2633 }
2634
2635 policy_allow_migrations(cache->policy, false);
2636 }
2637
2638 spin_lock_init(&cache->lock);
2639 INIT_LIST_HEAD(&cache->deferred_cells);
2640 bio_list_init(&cache->deferred_bios);
2641 bio_list_init(&cache->deferred_writethrough_bios);
2642 atomic_set(&cache->nr_allocated_migrations, 0);
2643 atomic_set(&cache->nr_io_migrations, 0);
2644 init_waitqueue_head(&cache->migration_wait);
2645
2646 r = -ENOMEM;
2647 atomic_set(&cache->nr_dirty, 0);
2648 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2649 if (!cache->dirty_bitset) {
2650 *error = "could not allocate dirty bitset";
2651 goto bad;
2652 }
2653 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2654
2655 cache->discard_block_size =
2656 calculate_discard_block_size(cache->sectors_per_block,
2657 cache->origin_sectors);
2658 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2659 cache->discard_block_size));
2660 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2661 if (!cache->discard_bitset) {
2662 *error = "could not allocate discard bitset";
2663 goto bad;
2664 }
2665 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2666
2667 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2668 if (IS_ERR(cache->copier)) {
2669 *error = "could not create kcopyd client";
2670 r = PTR_ERR(cache->copier);
2671 goto bad;
2672 }
2673
2674 cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2675 if (!cache->wq) {
2676 *error = "could not create workqueue for metadata object";
2677 goto bad;
2678 }
2679 INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2680 INIT_WORK(&cache->deferred_writethrough_worker,
2681 process_deferred_writethrough_bios);
2682 INIT_WORK(&cache->migration_worker, check_migrations);
2683 INIT_DELAYED_WORK(&cache->waker, do_waker);
2684
2685 cache->prison = dm_bio_prison_create_v2(cache->wq);
2686 if (!cache->prison) {
2687 *error = "could not create bio prison";
2688 goto bad;
2689 }
2690
2691 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2692 migration_cache);
2693 if (!cache->migration_pool) {
2694 *error = "Error creating cache's migration mempool";
2695 goto bad;
2696 }
2697
2698 cache->need_tick_bio = true;
2699 cache->sized = false;
2700 cache->invalidate = false;
2701 cache->commit_requested = false;
2702 cache->loaded_mappings = false;
2703 cache->loaded_discards = false;
2704
2705 load_stats(cache);
2706
2707 atomic_set(&cache->stats.demotion, 0);
2708 atomic_set(&cache->stats.promotion, 0);
2709 atomic_set(&cache->stats.copies_avoided, 0);
2710 atomic_set(&cache->stats.cache_cell_clash, 0);
2711 atomic_set(&cache->stats.commit_count, 0);
2712 atomic_set(&cache->stats.discard_count, 0);
2713
2714 spin_lock_init(&cache->invalidation_lock);
2715 INIT_LIST_HEAD(&cache->invalidation_requests);
2716
2717 batcher_init(&cache->committer, commit_op, cache,
2718 issue_op, cache, cache->wq);
2719 iot_init(&cache->tracker);
2720
2721 init_rwsem(&cache->background_work_lock);
2722 prevent_background_work(cache);
2723
2724 *result = cache;
2725 return 0;
2726bad:
2727 destroy(cache);
2728 return r;
2729}
2730
2731static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2732{
2733 unsigned i;
2734 const char **copy;
2735
2736 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2737 if (!copy)
2738 return -ENOMEM;
2739 for (i = 0; i < argc; i++) {
2740 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2741 if (!copy[i]) {
2742 while (i--)
2743 kfree(copy[i]);
2744 kfree(copy);
2745 return -ENOMEM;
2746 }
2747 }
2748
2749 cache->nr_ctr_args = argc;
2750 cache->ctr_args = copy;
2751
2752 return 0;
2753}
2754
2755static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2756{
2757 int r = -EINVAL;
2758 struct cache_args *ca;
2759 struct cache *cache = NULL;
2760
2761 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2762 if (!ca) {
2763 ti->error = "Error allocating memory for cache";
2764 return -ENOMEM;
2765 }
2766 ca->ti = ti;
2767
2768 r = parse_cache_args(ca, argc, argv, &ti->error);
2769 if (r)
2770 goto out;
2771
2772 r = cache_create(ca, &cache);
2773 if (r)
2774 goto out;
2775
2776 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2777 if (r) {
2778 destroy(cache);
2779 goto out;
2780 }
2781
2782 ti->private = cache;
2783out:
2784 destroy_cache_args(ca);
2785 return r;
2786}
2787
2788
2789
2790static int cache_map(struct dm_target *ti, struct bio *bio)
2791{
2792 struct cache *cache = ti->private;
2793
2794 int r;
2795 bool commit_needed;
2796 dm_oblock_t block = get_bio_block(cache, bio);
2797 size_t pb_data_size = get_per_bio_data_size(cache);
2798
2799 init_per_bio_data(bio, pb_data_size);
2800 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2801
2802
2803
2804
2805
2806 remap_to_origin(cache, bio);
2807 accounted_begin(cache, bio);
2808 return DM_MAPIO_REMAPPED;
2809 }
2810
2811 if (discard_or_flush(bio)) {
2812 defer_bio(cache, bio);
2813 return DM_MAPIO_SUBMITTED;
2814 }
2815
2816 r = map_bio(cache, bio, block, &commit_needed);
2817 if (commit_needed)
2818 schedule_commit(&cache->committer);
2819
2820 return r;
2821}
2822
2823static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2824{
2825 struct cache *cache = ti->private;
2826 unsigned long flags;
2827 size_t pb_data_size = get_per_bio_data_size(cache);
2828 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
2829
2830 if (pb->tick) {
2831 policy_tick(cache->policy, false);
2832
2833 spin_lock_irqsave(&cache->lock, flags);
2834 cache->need_tick_bio = true;
2835 spin_unlock_irqrestore(&cache->lock, flags);
2836 }
2837
2838 bio_drop_shared_lock(cache, bio);
2839 accounted_complete(cache, bio);
2840
2841 return 0;
2842}
2843
2844static int write_dirty_bitset(struct cache *cache)
2845{
2846 int r;
2847
2848 if (get_cache_mode(cache) >= CM_READ_ONLY)
2849 return -EINVAL;
2850
2851 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2852 if (r)
2853 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2854
2855 return r;
2856}
2857
2858static int write_discard_bitset(struct cache *cache)
2859{
2860 unsigned i, r;
2861
2862 if (get_cache_mode(cache) >= CM_READ_ONLY)
2863 return -EINVAL;
2864
2865 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2866 cache->discard_nr_blocks);
2867 if (r) {
2868 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2869 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2870 return r;
2871 }
2872
2873 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2874 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2875 is_discarded(cache, to_dblock(i)));
2876 if (r) {
2877 metadata_operation_failed(cache, "dm_cache_set_discard", r);
2878 return r;
2879 }
2880 }
2881
2882 return 0;
2883}
2884
2885static int write_hints(struct cache *cache)
2886{
2887 int r;
2888
2889 if (get_cache_mode(cache) >= CM_READ_ONLY)
2890 return -EINVAL;
2891
2892 r = dm_cache_write_hints(cache->cmd, cache->policy);
2893 if (r) {
2894 metadata_operation_failed(cache, "dm_cache_write_hints", r);
2895 return r;
2896 }
2897
2898 return 0;
2899}
2900
2901
2902
2903
2904static bool sync_metadata(struct cache *cache)
2905{
2906 int r1, r2, r3, r4;
2907
2908 r1 = write_dirty_bitset(cache);
2909 if (r1)
2910 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2911
2912 r2 = write_discard_bitset(cache);
2913 if (r2)
2914 DMERR("%s: could not write discard bitset", cache_device_name(cache));
2915
2916 save_stats(cache);
2917
2918 r3 = write_hints(cache);
2919 if (r3)
2920 DMERR("%s: could not write hints", cache_device_name(cache));
2921
2922
2923
2924
2925
2926
2927 r4 = commit(cache, !r1 && !r2 && !r3);
2928 if (r4)
2929 DMERR("%s: could not write cache metadata", cache_device_name(cache));
2930
2931 return !r1 && !r2 && !r3 && !r4;
2932}
2933
2934static void cache_postsuspend(struct dm_target *ti)
2935{
2936 struct cache *cache = ti->private;
2937
2938 prevent_background_work(cache);
2939 BUG_ON(atomic_read(&cache->nr_io_migrations));
2940
2941 cancel_delayed_work(&cache->waker);
2942 flush_workqueue(cache->wq);
2943 WARN_ON(cache->tracker.in_flight);
2944
2945
2946
2947
2948
2949 requeue_deferred_bios(cache);
2950
2951 if (get_cache_mode(cache) == CM_WRITE)
2952 (void) sync_metadata(cache);
2953}
2954
2955static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2956 bool dirty, uint32_t hint, bool hint_valid)
2957{
2958 int r;
2959 struct cache *cache = context;
2960
2961 if (dirty) {
2962 set_bit(from_cblock(cblock), cache->dirty_bitset);
2963 atomic_inc(&cache->nr_dirty);
2964 } else
2965 clear_bit(from_cblock(cblock), cache->dirty_bitset);
2966
2967 r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2968 if (r)
2969 return r;
2970
2971 return 0;
2972}
2973
2974
2975
2976
2977
2978
2979
2980struct discard_load_info {
2981 struct cache *cache;
2982
2983
2984
2985
2986
2987 dm_block_t block_size;
2988 dm_block_t discard_begin, discard_end;
2989};
2990
2991static void discard_load_info_init(struct cache *cache,
2992 struct discard_load_info *li)
2993{
2994 li->cache = cache;
2995 li->discard_begin = li->discard_end = 0;
2996}
2997
2998static void set_discard_range(struct discard_load_info *li)
2999{
3000 sector_t b, e;
3001
3002 if (li->discard_begin == li->discard_end)
3003 return;
3004
3005
3006
3007
3008 b = li->discard_begin * li->block_size;
3009 e = li->discard_end * li->block_size;
3010
3011
3012
3013
3014 b = dm_sector_div_up(b, li->cache->discard_block_size);
3015 sector_div(e, li->cache->discard_block_size);
3016
3017
3018
3019
3020
3021 if (e > from_dblock(li->cache->discard_nr_blocks))
3022 e = from_dblock(li->cache->discard_nr_blocks);
3023
3024 for (; b < e; b++)
3025 set_discard(li->cache, to_dblock(b));
3026}
3027
3028static int load_discard(void *context, sector_t discard_block_size,
3029 dm_dblock_t dblock, bool discard)
3030{
3031 struct discard_load_info *li = context;
3032
3033 li->block_size = discard_block_size;
3034
3035 if (discard) {
3036 if (from_dblock(dblock) == li->discard_end)
3037
3038
3039
3040 li->discard_end = li->discard_end + 1ULL;
3041
3042 else {
3043
3044
3045
3046 set_discard_range(li);
3047 li->discard_begin = from_dblock(dblock);
3048 li->discard_end = li->discard_begin + 1ULL;
3049 }
3050 } else {
3051 set_discard_range(li);
3052 li->discard_begin = li->discard_end = 0;
3053 }
3054
3055 return 0;
3056}
3057
3058static dm_cblock_t get_cache_dev_size(struct cache *cache)
3059{
3060 sector_t size = get_dev_size(cache->cache_dev);
3061 (void) sector_div(size, cache->sectors_per_block);
3062 return to_cblock(size);
3063}
3064
3065static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3066{
3067 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3068 return true;
3069
3070
3071
3072
3073 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3074 new_size = to_cblock(from_cblock(new_size) + 1);
3075 if (is_dirty(cache, new_size)) {
3076 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
3077 cache_device_name(cache),
3078 (unsigned long long) from_cblock(new_size));
3079 return false;
3080 }
3081 }
3082
3083 return true;
3084}
3085
3086static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3087{
3088 int r;
3089
3090 r = dm_cache_resize(cache->cmd, new_size);
3091 if (r) {
3092 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3093 metadata_operation_failed(cache, "dm_cache_resize", r);
3094 return r;
3095 }
3096
3097 set_cache_size(cache, new_size);
3098
3099 return 0;
3100}
3101
3102static int cache_preresume(struct dm_target *ti)
3103{
3104 int r = 0;
3105 struct cache *cache = ti->private;
3106 dm_cblock_t csize = get_cache_dev_size(cache);
3107
3108
3109
3110
3111 if (!cache->sized) {
3112 r = resize_cache_dev(cache, csize);
3113 if (r)
3114 return r;
3115
3116 cache->sized = true;
3117
3118 } else if (csize != cache->cache_size) {
3119 if (!can_resize(cache, csize))
3120 return -EINVAL;
3121
3122 r = resize_cache_dev(cache, csize);
3123 if (r)
3124 return r;
3125 }
3126
3127 if (!cache->loaded_mappings) {
3128 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3129 load_mapping, cache);
3130 if (r) {
3131 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3132 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3133 return r;
3134 }
3135
3136 cache->loaded_mappings = true;
3137 }
3138
3139 if (!cache->loaded_discards) {
3140 struct discard_load_info li;
3141
3142
3143
3144
3145
3146
3147 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3148
3149 discard_load_info_init(cache, &li);
3150 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3151 if (r) {
3152 DMERR("%s: could not load origin discards", cache_device_name(cache));
3153 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3154 return r;
3155 }
3156 set_discard_range(&li);
3157
3158 cache->loaded_discards = true;
3159 }
3160
3161 return r;
3162}
3163
3164static void cache_resume(struct dm_target *ti)
3165{
3166 struct cache *cache = ti->private;
3167
3168 cache->need_tick_bio = true;
3169 allow_background_work(cache);
3170 do_waker(&cache->waker.work);
3171}
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184static void cache_status(struct dm_target *ti, status_type_t type,
3185 unsigned status_flags, char *result, unsigned maxlen)
3186{
3187 int r = 0;
3188 unsigned i;
3189 ssize_t sz = 0;
3190 dm_block_t nr_free_blocks_metadata = 0;
3191 dm_block_t nr_blocks_metadata = 0;
3192 char buf[BDEVNAME_SIZE];
3193 struct cache *cache = ti->private;
3194 dm_cblock_t residency;
3195 bool needs_check;
3196
3197 switch (type) {
3198 case STATUSTYPE_INFO:
3199 if (get_cache_mode(cache) == CM_FAIL) {
3200 DMEMIT("Fail");
3201 break;
3202 }
3203
3204
3205 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3206 (void) commit(cache, false);
3207
3208 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3209 if (r) {
3210 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3211 cache_device_name(cache), r);
3212 goto err;
3213 }
3214
3215 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3216 if (r) {
3217 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3218 cache_device_name(cache), r);
3219 goto err;
3220 }
3221
3222 residency = policy_residency(cache->policy);
3223
3224 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3225 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3226 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3227 (unsigned long long)nr_blocks_metadata,
3228 (unsigned long long)cache->sectors_per_block,
3229 (unsigned long long) from_cblock(residency),
3230 (unsigned long long) from_cblock(cache->cache_size),
3231 (unsigned) atomic_read(&cache->stats.read_hit),
3232 (unsigned) atomic_read(&cache->stats.read_miss),
3233 (unsigned) atomic_read(&cache->stats.write_hit),
3234 (unsigned) atomic_read(&cache->stats.write_miss),
3235 (unsigned) atomic_read(&cache->stats.demotion),
3236 (unsigned) atomic_read(&cache->stats.promotion),
3237 (unsigned long) atomic_read(&cache->nr_dirty));
3238
3239 if (cache->features.metadata_version == 2)
3240 DMEMIT("2 metadata2 ");
3241 else
3242 DMEMIT("1 ");
3243
3244 if (writethrough_mode(&cache->features))
3245 DMEMIT("writethrough ");
3246
3247 else if (passthrough_mode(&cache->features))
3248 DMEMIT("passthrough ");
3249
3250 else if (writeback_mode(&cache->features))
3251 DMEMIT("writeback ");
3252
3253 else {
3254 DMERR("%s: internal error: unknown io mode: %d",
3255 cache_device_name(cache), (int) cache->features.io_mode);
3256 goto err;
3257 }
3258
3259 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3260
3261 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3262 if (sz < maxlen) {
3263 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3264 if (r)
3265 DMERR("%s: policy_emit_config_values returned %d",
3266 cache_device_name(cache), r);
3267 }
3268
3269 if (get_cache_mode(cache) == CM_READ_ONLY)
3270 DMEMIT("ro ");
3271 else
3272 DMEMIT("rw ");
3273
3274 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3275
3276 if (r || needs_check)
3277 DMEMIT("needs_check ");
3278 else
3279 DMEMIT("- ");
3280
3281 break;
3282
3283 case STATUSTYPE_TABLE:
3284 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3285 DMEMIT("%s ", buf);
3286 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3287 DMEMIT("%s ", buf);
3288 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3289 DMEMIT("%s", buf);
3290
3291 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3292 DMEMIT(" %s", cache->ctr_args[i]);
3293 if (cache->nr_ctr_args)
3294 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3295 }
3296
3297 return;
3298
3299err:
3300 DMEMIT("Error");
3301}
3302
3303
3304
3305
3306
3307struct cblock_range {
3308 dm_cblock_t begin;
3309 dm_cblock_t end;
3310};
3311
3312
3313
3314
3315
3316
3317
3318static int parse_cblock_range(struct cache *cache, const char *str,
3319 struct cblock_range *result)
3320{
3321 char dummy;
3322 uint64_t b, e;
3323 int r;
3324
3325
3326
3327
3328 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3329 if (r < 0)
3330 return r;
3331
3332 if (r == 2) {
3333 result->begin = to_cblock(b);
3334 result->end = to_cblock(e);
3335 return 0;
3336 }
3337
3338
3339
3340
3341 r = sscanf(str, "%llu%c", &b, &dummy);
3342 if (r < 0)
3343 return r;
3344
3345 if (r == 1) {
3346 result->begin = to_cblock(b);
3347 result->end = to_cblock(from_cblock(result->begin) + 1u);
3348 return 0;
3349 }
3350
3351 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3352 return -EINVAL;
3353}
3354
3355static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3356{
3357 uint64_t b = from_cblock(range->begin);
3358 uint64_t e = from_cblock(range->end);
3359 uint64_t n = from_cblock(cache->cache_size);
3360
3361 if (b >= n) {
3362 DMERR("%s: begin cblock out of range: %llu >= %llu",
3363 cache_device_name(cache), b, n);
3364 return -EINVAL;
3365 }
3366
3367 if (e > n) {
3368 DMERR("%s: end cblock out of range: %llu > %llu",
3369 cache_device_name(cache), e, n);
3370 return -EINVAL;
3371 }
3372
3373 if (b >= e) {
3374 DMERR("%s: invalid cblock range: %llu >= %llu",
3375 cache_device_name(cache), b, e);
3376 return -EINVAL;
3377 }
3378
3379 return 0;
3380}
3381
3382static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3383{
3384 return to_cblock(from_cblock(b) + 1);
3385}
3386
3387static int request_invalidation(struct cache *cache, struct cblock_range *range)
3388{
3389 int r = 0;
3390
3391
3392
3393
3394
3395
3396
3397 while (range->begin != range->end) {
3398 r = invalidate_cblock(cache, range->begin);
3399 if (r)
3400 return r;
3401
3402 range->begin = cblock_succ(range->begin);
3403 }
3404
3405 cache->commit_requested = true;
3406 return r;
3407}
3408
3409static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3410 const char **cblock_ranges)
3411{
3412 int r = 0;
3413 unsigned i;
3414 struct cblock_range range;
3415
3416 if (!passthrough_mode(&cache->features)) {
3417 DMERR("%s: cache has to be in passthrough mode for invalidation",
3418 cache_device_name(cache));
3419 return -EPERM;
3420 }
3421
3422 for (i = 0; i < count; i++) {
3423 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3424 if (r)
3425 break;
3426
3427 r = validate_cblock_range(cache, &range);
3428 if (r)
3429 break;
3430
3431
3432
3433
3434 r = request_invalidation(cache, &range);
3435 if (r)
3436 break;
3437 }
3438
3439 return r;
3440}
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3451{
3452 struct cache *cache = ti->private;
3453
3454 if (!argc)
3455 return -EINVAL;
3456
3457 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3458 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3459 cache_device_name(cache));
3460 return -EOPNOTSUPP;
3461 }
3462
3463 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3464 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3465
3466 if (argc != 2)
3467 return -EINVAL;
3468
3469 return set_config_value(cache, argv[0], argv[1]);
3470}
3471
3472static int cache_iterate_devices(struct dm_target *ti,
3473 iterate_devices_callout_fn fn, void *data)
3474{
3475 int r = 0;
3476 struct cache *cache = ti->private;
3477
3478 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3479 if (!r)
3480 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3481
3482 return r;
3483}
3484
3485static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3486{
3487
3488
3489
3490 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3491 cache->origin_sectors);
3492 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3493}
3494
3495static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3496{
3497 struct cache *cache = ti->private;
3498 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3499
3500
3501
3502
3503
3504 if (io_opt_sectors < cache->sectors_per_block ||
3505 do_div(io_opt_sectors, cache->sectors_per_block)) {
3506 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3507 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3508 }
3509 set_discard_limits(cache, limits);
3510}
3511
3512
3513
3514static struct target_type cache_target = {
3515 .name = "cache",
3516 .version = {2, 0, 0},
3517 .module = THIS_MODULE,
3518 .ctr = cache_ctr,
3519 .dtr = cache_dtr,
3520 .map = cache_map,
3521 .end_io = cache_end_io,
3522 .postsuspend = cache_postsuspend,
3523 .preresume = cache_preresume,
3524 .resume = cache_resume,
3525 .status = cache_status,
3526 .message = cache_message,
3527 .iterate_devices = cache_iterate_devices,
3528 .io_hints = cache_io_hints,
3529};
3530
3531static int __init dm_cache_init(void)
3532{
3533 int r;
3534
3535 r = dm_register_target(&cache_target);
3536 if (r) {
3537 DMERR("cache target registration failed: %d", r);
3538 return r;
3539 }
3540
3541 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3542 if (!migration_cache) {
3543 dm_unregister_target(&cache_target);
3544 return -ENOMEM;
3545 }
3546
3547 return 0;
3548}
3549
3550static void __exit dm_cache_exit(void)
3551{
3552 dm_unregister_target(&cache_target);
3553 kmem_cache_destroy(migration_cache);
3554}
3555
3556module_init(dm_cache_init);
3557module_exit(dm_cache_exit);
3558
3559MODULE_DESCRIPTION(DM_NAME " cache target");
3560MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3561MODULE_LICENSE("GPL");
3562