1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26
27
28#define IOT_RESOLUTION 4
29
30struct io_tracker {
31 spinlock_t lock;
32
33
34
35
36 sector_t in_flight;
37
38
39
40
41
42 unsigned long idle_time;
43 unsigned long last_update_time;
44};
45
46static void iot_init(struct io_tracker *iot)
47{
48 spin_lock_init(&iot->lock);
49 iot->in_flight = 0ul;
50 iot->idle_time = 0ul;
51 iot->last_update_time = jiffies;
52}
53
54static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
55{
56 if (iot->in_flight)
57 return false;
58
59 return time_after(jiffies, iot->idle_time + jifs);
60}
61
62static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
63{
64 bool r;
65 unsigned long flags;
66
67 spin_lock_irqsave(&iot->lock, flags);
68 r = __iot_idle_for(iot, jifs);
69 spin_unlock_irqrestore(&iot->lock, flags);
70
71 return r;
72}
73
74static void iot_io_begin(struct io_tracker *iot, sector_t len)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&iot->lock, flags);
79 iot->in_flight += len;
80 spin_unlock_irqrestore(&iot->lock, flags);
81}
82
83static void __iot_io_end(struct io_tracker *iot, sector_t len)
84{
85 iot->in_flight -= len;
86 if (!iot->in_flight)
87 iot->idle_time = jiffies;
88}
89
90static void iot_io_end(struct io_tracker *iot, sector_t len)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&iot->lock, flags);
95 __iot_io_end(iot, len);
96 spin_unlock_irqrestore(&iot->lock, flags);
97}
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119struct dm_hook_info {
120 bio_end_io_t *bi_end_io;
121};
122
123static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
124 bio_end_io_t *bi_end_io, void *bi_private)
125{
126 h->bi_end_io = bio->bi_end_io;
127
128 bio->bi_end_io = bi_end_io;
129 bio->bi_private = bi_private;
130}
131
132static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
133{
134 bio->bi_end_io = h->bi_end_io;
135}
136
137
138
139#define MIGRATION_POOL_SIZE 128
140#define COMMIT_PERIOD HZ
141#define MIGRATION_COUNT_WINDOW 10
142
143
144
145
146
147#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
148#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
149
150enum cache_metadata_mode {
151 CM_WRITE,
152 CM_READ_ONLY,
153 CM_FAIL
154};
155
156enum cache_io_mode {
157
158
159
160
161
162 CM_IO_WRITEBACK,
163
164
165
166
167
168 CM_IO_WRITETHROUGH,
169
170
171
172
173
174
175
176 CM_IO_PASSTHROUGH
177};
178
179struct cache_features {
180 enum cache_metadata_mode mode;
181 enum cache_io_mode io_mode;
182 unsigned metadata_version;
183};
184
185struct cache_stats {
186 atomic_t read_hit;
187 atomic_t read_miss;
188 atomic_t write_hit;
189 atomic_t write_miss;
190 atomic_t demotion;
191 atomic_t promotion;
192 atomic_t copies_avoided;
193 atomic_t cache_cell_clash;
194 atomic_t commit_count;
195 atomic_t discard_count;
196};
197
198
199
200
201
202struct cblock_range {
203 dm_cblock_t begin;
204 dm_cblock_t end;
205};
206
207struct invalidation_request {
208 struct list_head list;
209 struct cblock_range *cblocks;
210
211 atomic_t complete;
212 int err;
213
214 wait_queue_head_t result_wait;
215};
216
217struct cache {
218 struct dm_target *ti;
219 struct dm_target_callbacks callbacks;
220
221 struct dm_cache_metadata *cmd;
222
223
224
225
226 struct dm_dev *metadata_dev;
227
228
229
230
231 struct dm_dev *origin_dev;
232
233
234
235
236 struct dm_dev *cache_dev;
237
238
239
240
241 dm_oblock_t origin_blocks;
242 sector_t origin_sectors;
243
244
245
246
247 dm_cblock_t cache_size;
248
249
250
251
252 sector_t sectors_per_block;
253 int sectors_per_block_shift;
254
255 spinlock_t lock;
256 struct list_head deferred_cells;
257 struct bio_list deferred_bios;
258 struct bio_list deferred_flush_bios;
259 struct bio_list deferred_writethrough_bios;
260 struct list_head quiesced_migrations;
261 struct list_head completed_migrations;
262 struct list_head need_commit_migrations;
263 sector_t migration_threshold;
264 wait_queue_head_t migration_wait;
265 atomic_t nr_allocated_migrations;
266
267
268
269
270
271 atomic_t nr_io_migrations;
272
273 wait_queue_head_t quiescing_wait;
274 atomic_t quiescing;
275 atomic_t quiescing_ack;
276
277
278
279
280 atomic_t nr_dirty;
281 unsigned long *dirty_bitset;
282
283
284
285
286 dm_dblock_t discard_nr_blocks;
287 unsigned long *discard_bitset;
288 uint32_t discard_block_size;
289
290
291
292
293
294 unsigned nr_ctr_args;
295 const char **ctr_args;
296
297 struct dm_kcopyd_client *copier;
298 struct workqueue_struct *wq;
299 struct work_struct worker;
300
301 struct delayed_work waker;
302 unsigned long last_commit_jiffies;
303
304 struct dm_bio_prison *prison;
305 struct dm_deferred_set *all_io_ds;
306
307 mempool_t *migration_pool;
308
309 struct dm_cache_policy *policy;
310 unsigned policy_nr_args;
311
312 bool need_tick_bio:1;
313 bool sized:1;
314 bool invalidate:1;
315 bool commit_requested:1;
316 bool loaded_mappings:1;
317 bool loaded_discards:1;
318
319
320
321
322 struct cache_features features;
323
324 struct cache_stats stats;
325
326
327
328
329 spinlock_t invalidation_lock;
330 struct list_head invalidation_requests;
331
332 struct io_tracker origin_tracker;
333};
334
335struct per_bio_data {
336 bool tick:1;
337 unsigned req_nr:2;
338 struct dm_deferred_entry *all_io_entry;
339 struct dm_hook_info hook_info;
340 sector_t len;
341
342
343
344
345
346
347 struct cache *cache;
348 dm_cblock_t cblock;
349 struct dm_bio_details bio_details;
350};
351
352struct dm_cache_migration {
353 struct list_head list;
354 struct cache *cache;
355
356 unsigned long start_jiffies;
357 dm_oblock_t old_oblock;
358 dm_oblock_t new_oblock;
359 dm_cblock_t cblock;
360
361 bool err:1;
362 bool discard:1;
363 bool writeback:1;
364 bool demote:1;
365 bool promote:1;
366 bool requeue_holder:1;
367 bool invalidate:1;
368
369 struct dm_bio_prison_cell *old_ocell;
370 struct dm_bio_prison_cell *new_ocell;
371};
372
373
374
375
376
377
378struct prealloc {
379 struct dm_cache_migration *mg;
380 struct dm_bio_prison_cell *cell1;
381 struct dm_bio_prison_cell *cell2;
382};
383
384static enum cache_metadata_mode get_cache_mode(struct cache *cache);
385
386static void wake_worker(struct cache *cache)
387{
388 queue_work(cache->wq, &cache->worker);
389}
390
391
392
393static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
394{
395
396 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
397}
398
399static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
400{
401 dm_bio_prison_free_cell(cache->prison, cell);
402}
403
404static struct dm_cache_migration *alloc_migration(struct cache *cache)
405{
406 struct dm_cache_migration *mg;
407
408 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
409 if (mg) {
410 mg->cache = cache;
411 atomic_inc(&mg->cache->nr_allocated_migrations);
412 }
413
414 return mg;
415}
416
417static void free_migration(struct dm_cache_migration *mg)
418{
419 struct cache *cache = mg->cache;
420
421 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
422 wake_up(&cache->migration_wait);
423
424 mempool_free(mg, cache->migration_pool);
425}
426
427static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
428{
429 if (!p->mg) {
430 p->mg = alloc_migration(cache);
431 if (!p->mg)
432 return -ENOMEM;
433 }
434
435 if (!p->cell1) {
436 p->cell1 = alloc_prison_cell(cache);
437 if (!p->cell1)
438 return -ENOMEM;
439 }
440
441 if (!p->cell2) {
442 p->cell2 = alloc_prison_cell(cache);
443 if (!p->cell2)
444 return -ENOMEM;
445 }
446
447 return 0;
448}
449
450static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
451{
452 if (p->cell2)
453 free_prison_cell(cache, p->cell2);
454
455 if (p->cell1)
456 free_prison_cell(cache, p->cell1);
457
458 if (p->mg)
459 free_migration(p->mg);
460}
461
462static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
463{
464 struct dm_cache_migration *mg = p->mg;
465
466 BUG_ON(!mg);
467 p->mg = NULL;
468
469 return mg;
470}
471
472
473
474
475
476static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
477{
478 struct dm_bio_prison_cell *r = NULL;
479
480 if (p->cell1) {
481 r = p->cell1;
482 p->cell1 = NULL;
483
484 } else if (p->cell2) {
485 r = p->cell2;
486 p->cell2 = NULL;
487 } else
488 BUG();
489
490 return r;
491}
492
493
494
495
496
497static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
498{
499 if (!p->cell2)
500 p->cell2 = cell;
501
502 else if (!p->cell1)
503 p->cell1 = cell;
504
505 else
506 BUG();
507}
508
509
510
511static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
512{
513 key->virtual = 0;
514 key->dev = 0;
515 key->block_begin = from_oblock(begin);
516 key->block_end = from_oblock(end);
517}
518
519
520
521
522
523
524typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
525
526static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
527 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
528 cell_free_fn free_fn, void *free_context,
529 struct dm_bio_prison_cell **cell_result)
530{
531 int r;
532 struct dm_cell_key key;
533
534 build_key(oblock_begin, oblock_end, &key);
535 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
536 if (r)
537 free_fn(free_context, cell_prealloc);
538
539 return r;
540}
541
542static int bio_detain(struct cache *cache, dm_oblock_t oblock,
543 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
544 cell_free_fn free_fn, void *free_context,
545 struct dm_bio_prison_cell **cell_result)
546{
547 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
548 return bio_detain_range(cache, oblock, end, bio,
549 cell_prealloc, free_fn, free_context, cell_result);
550}
551
552static int get_cell(struct cache *cache,
553 dm_oblock_t oblock,
554 struct prealloc *structs,
555 struct dm_bio_prison_cell **cell_result)
556{
557 int r;
558 struct dm_cell_key key;
559 struct dm_bio_prison_cell *cell_prealloc;
560
561 cell_prealloc = prealloc_get_cell(structs);
562
563 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
564 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
565 if (r)
566 prealloc_put_cell(structs, cell_prealloc);
567
568 return r;
569}
570
571
572
573static bool is_dirty(struct cache *cache, dm_cblock_t b)
574{
575 return test_bit(from_cblock(b), cache->dirty_bitset);
576}
577
578static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
579{
580 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
581 atomic_inc(&cache->nr_dirty);
582 policy_set_dirty(cache->policy, oblock);
583 }
584}
585
586static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
587{
588 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
589 policy_clear_dirty(cache->policy, oblock);
590 if (atomic_dec_return(&cache->nr_dirty) == 0)
591 dm_table_event(cache->ti->table);
592 }
593}
594
595
596
597static bool block_size_is_power_of_two(struct cache *cache)
598{
599 return cache->sectors_per_block_shift >= 0;
600}
601
602
603#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
604__always_inline
605#endif
606static dm_block_t block_div(dm_block_t b, uint32_t n)
607{
608 do_div(b, n);
609
610 return b;
611}
612
613static dm_block_t oblocks_per_dblock(struct cache *cache)
614{
615 dm_block_t oblocks = cache->discard_block_size;
616
617 if (block_size_is_power_of_two(cache))
618 oblocks >>= cache->sectors_per_block_shift;
619 else
620 oblocks = block_div(oblocks, cache->sectors_per_block);
621
622 return oblocks;
623}
624
625static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
626{
627 return to_dblock(block_div(from_oblock(oblock),
628 oblocks_per_dblock(cache)));
629}
630
631static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
632{
633 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
634}
635
636static void set_discard(struct cache *cache, dm_dblock_t b)
637{
638 unsigned long flags;
639
640 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
641 atomic_inc(&cache->stats.discard_count);
642
643 spin_lock_irqsave(&cache->lock, flags);
644 set_bit(from_dblock(b), cache->discard_bitset);
645 spin_unlock_irqrestore(&cache->lock, flags);
646}
647
648static void clear_discard(struct cache *cache, dm_dblock_t b)
649{
650 unsigned long flags;
651
652 spin_lock_irqsave(&cache->lock, flags);
653 clear_bit(from_dblock(b), cache->discard_bitset);
654 spin_unlock_irqrestore(&cache->lock, flags);
655}
656
657static bool is_discarded(struct cache *cache, dm_dblock_t b)
658{
659 int r;
660 unsigned long flags;
661
662 spin_lock_irqsave(&cache->lock, flags);
663 r = test_bit(from_dblock(b), cache->discard_bitset);
664 spin_unlock_irqrestore(&cache->lock, flags);
665
666 return r;
667}
668
669static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
670{
671 int r;
672 unsigned long flags;
673
674 spin_lock_irqsave(&cache->lock, flags);
675 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
676 cache->discard_bitset);
677 spin_unlock_irqrestore(&cache->lock, flags);
678
679 return r;
680}
681
682
683
684static void load_stats(struct cache *cache)
685{
686 struct dm_cache_statistics stats;
687
688 dm_cache_metadata_get_stats(cache->cmd, &stats);
689 atomic_set(&cache->stats.read_hit, stats.read_hits);
690 atomic_set(&cache->stats.read_miss, stats.read_misses);
691 atomic_set(&cache->stats.write_hit, stats.write_hits);
692 atomic_set(&cache->stats.write_miss, stats.write_misses);
693}
694
695static void save_stats(struct cache *cache)
696{
697 struct dm_cache_statistics stats;
698
699 if (get_cache_mode(cache) >= CM_READ_ONLY)
700 return;
701
702 stats.read_hits = atomic_read(&cache->stats.read_hit);
703 stats.read_misses = atomic_read(&cache->stats.read_miss);
704 stats.write_hits = atomic_read(&cache->stats.write_hit);
705 stats.write_misses = atomic_read(&cache->stats.write_miss);
706
707 dm_cache_metadata_set_stats(cache->cmd, &stats);
708}
709
710
711
712
713
714
715
716
717#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
718#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
719
720static bool writethrough_mode(struct cache_features *f)
721{
722 return f->io_mode == CM_IO_WRITETHROUGH;
723}
724
725static bool writeback_mode(struct cache_features *f)
726{
727 return f->io_mode == CM_IO_WRITEBACK;
728}
729
730static bool passthrough_mode(struct cache_features *f)
731{
732 return f->io_mode == CM_IO_PASSTHROUGH;
733}
734
735static size_t get_per_bio_data_size(struct cache *cache)
736{
737 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
738}
739
740static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
741{
742 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
743 BUG_ON(!pb);
744 return pb;
745}
746
747static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
748{
749 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
750
751 pb->tick = false;
752 pb->req_nr = dm_bio_get_target_bio_nr(bio);
753 pb->all_io_entry = NULL;
754 pb->len = 0;
755
756 return pb;
757}
758
759
760
761
762static void remap_to_origin(struct cache *cache, struct bio *bio)
763{
764 bio->bi_bdev = cache->origin_dev->bdev;
765}
766
767static void remap_to_cache(struct cache *cache, struct bio *bio,
768 dm_cblock_t cblock)
769{
770 sector_t bi_sector = bio->bi_iter.bi_sector;
771 sector_t block = from_cblock(cblock);
772
773 bio->bi_bdev = cache->cache_dev->bdev;
774 if (!block_size_is_power_of_two(cache))
775 bio->bi_iter.bi_sector =
776 (block * cache->sectors_per_block) +
777 sector_div(bi_sector, cache->sectors_per_block);
778 else
779 bio->bi_iter.bi_sector =
780 (block << cache->sectors_per_block_shift) |
781 (bi_sector & (cache->sectors_per_block - 1));
782}
783
784static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
785{
786 unsigned long flags;
787 size_t pb_data_size = get_per_bio_data_size(cache);
788 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
789
790 spin_lock_irqsave(&cache->lock, flags);
791 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
792 bio_op(bio) != REQ_OP_DISCARD) {
793 pb->tick = true;
794 cache->need_tick_bio = false;
795 }
796 spin_unlock_irqrestore(&cache->lock, flags);
797}
798
799static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
800 dm_oblock_t oblock)
801{
802 check_if_tick_bio_needed(cache, bio);
803 remap_to_origin(cache, bio);
804 if (bio_data_dir(bio) == WRITE)
805 clear_discard(cache, oblock_to_dblock(cache, oblock));
806}
807
808static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
809 dm_oblock_t oblock, dm_cblock_t cblock)
810{
811 check_if_tick_bio_needed(cache, bio);
812 remap_to_cache(cache, bio, cblock);
813 if (bio_data_dir(bio) == WRITE) {
814 set_dirty(cache, oblock, cblock);
815 clear_discard(cache, oblock_to_dblock(cache, oblock));
816 }
817}
818
819static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
820{
821 sector_t block_nr = bio->bi_iter.bi_sector;
822
823 if (!block_size_is_power_of_two(cache))
824 (void) sector_div(block_nr, cache->sectors_per_block);
825 else
826 block_nr >>= cache->sectors_per_block_shift;
827
828 return to_oblock(block_nr);
829}
830
831
832
833
834
835static void inc_ds(struct cache *cache, struct bio *bio,
836 struct dm_bio_prison_cell *cell)
837{
838 size_t pb_data_size = get_per_bio_data_size(cache);
839 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
840
841 BUG_ON(!cell);
842 BUG_ON(pb->all_io_entry);
843
844 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
845}
846
847static bool accountable_bio(struct cache *cache, struct bio *bio)
848{
849 return ((bio->bi_bdev == cache->origin_dev->bdev) &&
850 bio_op(bio) != REQ_OP_DISCARD);
851}
852
853static void accounted_begin(struct cache *cache, struct bio *bio)
854{
855 size_t pb_data_size = get_per_bio_data_size(cache);
856 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
857
858 if (accountable_bio(cache, bio)) {
859 pb->len = bio_sectors(bio);
860 iot_io_begin(&cache->origin_tracker, pb->len);
861 }
862}
863
864static void accounted_complete(struct cache *cache, struct bio *bio)
865{
866 size_t pb_data_size = get_per_bio_data_size(cache);
867 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
868
869 iot_io_end(&cache->origin_tracker, pb->len);
870}
871
872static void accounted_request(struct cache *cache, struct bio *bio)
873{
874 accounted_begin(cache, bio);
875 generic_make_request(bio);
876}
877
878static void issue(struct cache *cache, struct bio *bio)
879{
880 unsigned long flags;
881
882 if (!op_is_flush(bio->bi_opf)) {
883 accounted_request(cache, bio);
884 return;
885 }
886
887
888
889
890
891 spin_lock_irqsave(&cache->lock, flags);
892 cache->commit_requested = true;
893 bio_list_add(&cache->deferred_flush_bios, bio);
894 spin_unlock_irqrestore(&cache->lock, flags);
895}
896
897static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
898{
899 inc_ds(cache, bio, cell);
900 issue(cache, bio);
901}
902
903static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
904{
905 unsigned long flags;
906
907 spin_lock_irqsave(&cache->lock, flags);
908 bio_list_add(&cache->deferred_writethrough_bios, bio);
909 spin_unlock_irqrestore(&cache->lock, flags);
910
911 wake_worker(cache);
912}
913
914static void writethrough_endio(struct bio *bio)
915{
916 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
917
918 dm_unhook_bio(&pb->hook_info, bio);
919
920 if (bio->bi_error) {
921 bio_endio(bio);
922 return;
923 }
924
925 dm_bio_restore(&pb->bio_details, bio);
926 remap_to_cache(pb->cache, bio, pb->cblock);
927
928
929
930
931
932
933 defer_writethrough_bio(pb->cache, bio);
934}
935
936
937
938
939
940
941
942static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
943 dm_oblock_t oblock, dm_cblock_t cblock)
944{
945 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
946
947 pb->cache = cache;
948 pb->cblock = cblock;
949 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
950 dm_bio_record(&pb->bio_details, bio);
951
952 remap_to_origin_clear_discard(pb->cache, bio, oblock);
953}
954
955
956
957
958static enum cache_metadata_mode get_cache_mode(struct cache *cache)
959{
960 return cache->features.mode;
961}
962
963static const char *cache_device_name(struct cache *cache)
964{
965 return dm_device_name(dm_table_get_md(cache->ti->table));
966}
967
968static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
969{
970 const char *descs[] = {
971 "write",
972 "read-only",
973 "fail"
974 };
975
976 dm_table_event(cache->ti->table);
977 DMINFO("%s: switching cache to %s mode",
978 cache_device_name(cache), descs[(int)mode]);
979}
980
981static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
982{
983 bool needs_check;
984 enum cache_metadata_mode old_mode = get_cache_mode(cache);
985
986 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
987 DMERR("%s: unable to read needs_check flag, setting failure mode.",
988 cache_device_name(cache));
989 new_mode = CM_FAIL;
990 }
991
992 if (new_mode == CM_WRITE && needs_check) {
993 DMERR("%s: unable to switch cache to write mode until repaired.",
994 cache_device_name(cache));
995 if (old_mode != new_mode)
996 new_mode = old_mode;
997 else
998 new_mode = CM_READ_ONLY;
999 }
1000
1001
1002 if (old_mode == CM_FAIL)
1003 new_mode = CM_FAIL;
1004
1005 switch (new_mode) {
1006 case CM_FAIL:
1007 case CM_READ_ONLY:
1008 dm_cache_metadata_set_read_only(cache->cmd);
1009 break;
1010
1011 case CM_WRITE:
1012 dm_cache_metadata_set_read_write(cache->cmd);
1013 break;
1014 }
1015
1016 cache->features.mode = new_mode;
1017
1018 if (new_mode != old_mode)
1019 notify_mode_switch(cache, new_mode);
1020}
1021
1022static void abort_transaction(struct cache *cache)
1023{
1024 const char *dev_name = cache_device_name(cache);
1025
1026 if (get_cache_mode(cache) >= CM_READ_ONLY)
1027 return;
1028
1029 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
1030 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
1031 set_cache_mode(cache, CM_FAIL);
1032 }
1033
1034 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1035 if (dm_cache_metadata_abort(cache->cmd)) {
1036 DMERR("%s: failed to abort metadata transaction", dev_name);
1037 set_cache_mode(cache, CM_FAIL);
1038 }
1039}
1040
1041static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1042{
1043 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1044 cache_device_name(cache), op, r);
1045 abort_transaction(cache);
1046 set_cache_mode(cache, CM_READ_ONLY);
1047}
1048
1049
1050
1051
1052
1053
1054
1055static void inc_io_migrations(struct cache *cache)
1056{
1057 atomic_inc(&cache->nr_io_migrations);
1058}
1059
1060static void dec_io_migrations(struct cache *cache)
1061{
1062 atomic_dec(&cache->nr_io_migrations);
1063}
1064
1065static bool discard_or_flush(struct bio *bio)
1066{
1067 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1068}
1069
1070static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
1071{
1072 if (discard_or_flush(cell->holder)) {
1073
1074
1075
1076 dm_cell_release(cache->prison, cell, &cache->deferred_bios);
1077 free_prison_cell(cache, cell);
1078 } else
1079 list_add_tail(&cell->user_list, &cache->deferred_cells);
1080}
1081
1082static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder)
1083{
1084 unsigned long flags;
1085
1086 if (!holder && dm_cell_promote_or_release(cache->prison, cell)) {
1087
1088
1089
1090
1091 free_prison_cell(cache, cell);
1092 return;
1093 }
1094
1095 spin_lock_irqsave(&cache->lock, flags);
1096 __cell_defer(cache, cell);
1097 spin_unlock_irqrestore(&cache->lock, flags);
1098
1099 wake_worker(cache);
1100}
1101
1102static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err)
1103{
1104 dm_cell_error(cache->prison, cell, err);
1105 free_prison_cell(cache, cell);
1106}
1107
1108static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)
1109{
1110 cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE);
1111}
1112
1113static void free_io_migration(struct dm_cache_migration *mg)
1114{
1115 struct cache *cache = mg->cache;
1116
1117 dec_io_migrations(cache);
1118 free_migration(mg);
1119 wake_worker(cache);
1120}
1121
1122static void migration_failure(struct dm_cache_migration *mg)
1123{
1124 struct cache *cache = mg->cache;
1125 const char *dev_name = cache_device_name(cache);
1126
1127 if (mg->writeback) {
1128 DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name);
1129 set_dirty(cache, mg->old_oblock, mg->cblock);
1130 cell_defer(cache, mg->old_ocell, false);
1131
1132 } else if (mg->demote) {
1133 DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name);
1134 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
1135
1136 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1137 if (mg->promote)
1138 cell_defer(cache, mg->new_ocell, true);
1139 } else {
1140 DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name);
1141 policy_remove_mapping(cache->policy, mg->new_oblock);
1142 cell_defer(cache, mg->new_ocell, true);
1143 }
1144
1145 free_io_migration(mg);
1146}
1147
1148static void migration_success_pre_commit(struct dm_cache_migration *mg)
1149{
1150 int r;
1151 unsigned long flags;
1152 struct cache *cache = mg->cache;
1153
1154 if (mg->writeback) {
1155 clear_dirty(cache, mg->old_oblock, mg->cblock);
1156 cell_defer(cache, mg->old_ocell, false);
1157 free_io_migration(mg);
1158 return;
1159
1160 } else if (mg->demote) {
1161 r = dm_cache_remove_mapping(cache->cmd, mg->cblock);
1162 if (r) {
1163 DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata",
1164 cache_device_name(cache));
1165 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1166 policy_force_mapping(cache->policy, mg->new_oblock,
1167 mg->old_oblock);
1168 if (mg->promote)
1169 cell_defer(cache, mg->new_ocell, true);
1170 free_io_migration(mg);
1171 return;
1172 }
1173 } else {
1174 r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock);
1175 if (r) {
1176 DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata",
1177 cache_device_name(cache));
1178 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1179 policy_remove_mapping(cache->policy, mg->new_oblock);
1180 free_io_migration(mg);
1181 return;
1182 }
1183 }
1184
1185 spin_lock_irqsave(&cache->lock, flags);
1186 list_add_tail(&mg->list, &cache->need_commit_migrations);
1187 cache->commit_requested = true;
1188 spin_unlock_irqrestore(&cache->lock, flags);
1189}
1190
1191static void migration_success_post_commit(struct dm_cache_migration *mg)
1192{
1193 unsigned long flags;
1194 struct cache *cache = mg->cache;
1195
1196 if (mg->writeback) {
1197 DMWARN_LIMIT("%s: writeback unexpectedly triggered commit",
1198 cache_device_name(cache));
1199 return;
1200
1201 } else if (mg->demote) {
1202 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1203
1204 if (mg->promote) {
1205 mg->demote = false;
1206
1207 spin_lock_irqsave(&cache->lock, flags);
1208 list_add_tail(&mg->list, &cache->quiesced_migrations);
1209 spin_unlock_irqrestore(&cache->lock, flags);
1210
1211 } else {
1212 if (mg->invalidate)
1213 policy_remove_mapping(cache->policy, mg->old_oblock);
1214 free_io_migration(mg);
1215 }
1216
1217 } else {
1218 if (mg->requeue_holder) {
1219 clear_dirty(cache, mg->new_oblock, mg->cblock);
1220 cell_defer(cache, mg->new_ocell, true);
1221 } else {
1222
1223
1224
1225 set_dirty(cache, mg->new_oblock, mg->cblock);
1226 bio_endio(mg->new_ocell->holder);
1227 cell_defer(cache, mg->new_ocell, false);
1228 }
1229 free_io_migration(mg);
1230 }
1231}
1232
1233static void copy_complete(int read_err, unsigned long write_err, void *context)
1234{
1235 unsigned long flags;
1236 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1237 struct cache *cache = mg->cache;
1238
1239 if (read_err || write_err)
1240 mg->err = true;
1241
1242 spin_lock_irqsave(&cache->lock, flags);
1243 list_add_tail(&mg->list, &cache->completed_migrations);
1244 spin_unlock_irqrestore(&cache->lock, flags);
1245
1246 wake_worker(cache);
1247}
1248
1249static void issue_copy(struct dm_cache_migration *mg)
1250{
1251 int r;
1252 struct dm_io_region o_region, c_region;
1253 struct cache *cache = mg->cache;
1254 sector_t cblock = from_cblock(mg->cblock);
1255
1256 o_region.bdev = cache->origin_dev->bdev;
1257 o_region.count = cache->sectors_per_block;
1258
1259 c_region.bdev = cache->cache_dev->bdev;
1260 c_region.sector = cblock * cache->sectors_per_block;
1261 c_region.count = cache->sectors_per_block;
1262
1263 if (mg->writeback || mg->demote) {
1264
1265 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1266 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1267 } else {
1268
1269 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1270 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1271 }
1272
1273 if (r < 0) {
1274 DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache));
1275 migration_failure(mg);
1276 }
1277}
1278
1279static void overwrite_endio(struct bio *bio)
1280{
1281 struct dm_cache_migration *mg = bio->bi_private;
1282 struct cache *cache = mg->cache;
1283 size_t pb_data_size = get_per_bio_data_size(cache);
1284 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1285 unsigned long flags;
1286
1287 dm_unhook_bio(&pb->hook_info, bio);
1288
1289 if (bio->bi_error)
1290 mg->err = true;
1291
1292 mg->requeue_holder = false;
1293
1294 spin_lock_irqsave(&cache->lock, flags);
1295 list_add_tail(&mg->list, &cache->completed_migrations);
1296 spin_unlock_irqrestore(&cache->lock, flags);
1297
1298 wake_worker(cache);
1299}
1300
1301static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1302{
1303 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1304 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1305
1306 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1307 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1308
1309
1310
1311
1312
1313 accounted_request(mg->cache, bio);
1314}
1315
1316static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1317{
1318 return (bio_data_dir(bio) == WRITE) &&
1319 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1320}
1321
1322static void avoid_copy(struct dm_cache_migration *mg)
1323{
1324 atomic_inc(&mg->cache->stats.copies_avoided);
1325 migration_success_pre_commit(mg);
1326}
1327
1328static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1329 dm_dblock_t *b, dm_dblock_t *e)
1330{
1331 sector_t sb = bio->bi_iter.bi_sector;
1332 sector_t se = bio_end_sector(bio);
1333
1334 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1335
1336 if (se - sb < cache->discard_block_size)
1337 *e = *b;
1338 else
1339 *e = to_dblock(block_div(se, cache->discard_block_size));
1340}
1341
1342static void issue_discard(struct dm_cache_migration *mg)
1343{
1344 dm_dblock_t b, e;
1345 struct bio *bio = mg->new_ocell->holder;
1346 struct cache *cache = mg->cache;
1347
1348 calc_discard_block_range(cache, bio, &b, &e);
1349 while (b != e) {
1350 set_discard(cache, b);
1351 b = to_dblock(from_dblock(b) + 1);
1352 }
1353
1354 bio_endio(bio);
1355 cell_defer(cache, mg->new_ocell, false);
1356 free_migration(mg);
1357 wake_worker(cache);
1358}
1359
1360static void issue_copy_or_discard(struct dm_cache_migration *mg)
1361{
1362 bool avoid;
1363 struct cache *cache = mg->cache;
1364
1365 if (mg->discard) {
1366 issue_discard(mg);
1367 return;
1368 }
1369
1370 if (mg->writeback || mg->demote)
1371 avoid = !is_dirty(cache, mg->cblock) ||
1372 is_discarded_oblock(cache, mg->old_oblock);
1373 else {
1374 struct bio *bio = mg->new_ocell->holder;
1375
1376 avoid = is_discarded_oblock(cache, mg->new_oblock);
1377
1378 if (writeback_mode(&cache->features) &&
1379 !avoid && bio_writes_complete_block(cache, bio)) {
1380 issue_overwrite(mg, bio);
1381 return;
1382 }
1383 }
1384
1385 avoid ? avoid_copy(mg) : issue_copy(mg);
1386}
1387
1388static void complete_migration(struct dm_cache_migration *mg)
1389{
1390 if (mg->err)
1391 migration_failure(mg);
1392 else
1393 migration_success_pre_commit(mg);
1394}
1395
1396static void process_migrations(struct cache *cache, struct list_head *head,
1397 void (*fn)(struct dm_cache_migration *))
1398{
1399 unsigned long flags;
1400 struct list_head list;
1401 struct dm_cache_migration *mg, *tmp;
1402
1403 INIT_LIST_HEAD(&list);
1404 spin_lock_irqsave(&cache->lock, flags);
1405 list_splice_init(head, &list);
1406 spin_unlock_irqrestore(&cache->lock, flags);
1407
1408 list_for_each_entry_safe(mg, tmp, &list, list)
1409 fn(mg);
1410}
1411
1412static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1413{
1414 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1415}
1416
1417static void queue_quiesced_migration(struct dm_cache_migration *mg)
1418{
1419 unsigned long flags;
1420 struct cache *cache = mg->cache;
1421
1422 spin_lock_irqsave(&cache->lock, flags);
1423 __queue_quiesced_migration(mg);
1424 spin_unlock_irqrestore(&cache->lock, flags);
1425
1426 wake_worker(cache);
1427}
1428
1429static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1430{
1431 unsigned long flags;
1432 struct dm_cache_migration *mg, *tmp;
1433
1434 spin_lock_irqsave(&cache->lock, flags);
1435 list_for_each_entry_safe(mg, tmp, work, list)
1436 __queue_quiesced_migration(mg);
1437 spin_unlock_irqrestore(&cache->lock, flags);
1438
1439 wake_worker(cache);
1440}
1441
1442static void check_for_quiesced_migrations(struct cache *cache,
1443 struct per_bio_data *pb)
1444{
1445 struct list_head work;
1446
1447 if (!pb->all_io_entry)
1448 return;
1449
1450 INIT_LIST_HEAD(&work);
1451 dm_deferred_entry_dec(pb->all_io_entry, &work);
1452
1453 if (!list_empty(&work))
1454 queue_quiesced_migrations(cache, &work);
1455}
1456
1457static void quiesce_migration(struct dm_cache_migration *mg)
1458{
1459 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1460 queue_quiesced_migration(mg);
1461}
1462
1463static void promote(struct cache *cache, struct prealloc *structs,
1464 dm_oblock_t oblock, dm_cblock_t cblock,
1465 struct dm_bio_prison_cell *cell)
1466{
1467 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1468
1469 mg->err = false;
1470 mg->discard = false;
1471 mg->writeback = false;
1472 mg->demote = false;
1473 mg->promote = true;
1474 mg->requeue_holder = true;
1475 mg->invalidate = false;
1476 mg->cache = cache;
1477 mg->new_oblock = oblock;
1478 mg->cblock = cblock;
1479 mg->old_ocell = NULL;
1480 mg->new_ocell = cell;
1481 mg->start_jiffies = jiffies;
1482
1483 inc_io_migrations(cache);
1484 quiesce_migration(mg);
1485}
1486
1487static void writeback(struct cache *cache, struct prealloc *structs,
1488 dm_oblock_t oblock, dm_cblock_t cblock,
1489 struct dm_bio_prison_cell *cell)
1490{
1491 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1492
1493 mg->err = false;
1494 mg->discard = false;
1495 mg->writeback = true;
1496 mg->demote = false;
1497 mg->promote = false;
1498 mg->requeue_holder = true;
1499 mg->invalidate = false;
1500 mg->cache = cache;
1501 mg->old_oblock = oblock;
1502 mg->cblock = cblock;
1503 mg->old_ocell = cell;
1504 mg->new_ocell = NULL;
1505 mg->start_jiffies = jiffies;
1506
1507 inc_io_migrations(cache);
1508 quiesce_migration(mg);
1509}
1510
1511static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1512 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1513 dm_cblock_t cblock,
1514 struct dm_bio_prison_cell *old_ocell,
1515 struct dm_bio_prison_cell *new_ocell)
1516{
1517 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1518
1519 mg->err = false;
1520 mg->discard = false;
1521 mg->writeback = false;
1522 mg->demote = true;
1523 mg->promote = true;
1524 mg->requeue_holder = true;
1525 mg->invalidate = false;
1526 mg->cache = cache;
1527 mg->old_oblock = old_oblock;
1528 mg->new_oblock = new_oblock;
1529 mg->cblock = cblock;
1530 mg->old_ocell = old_ocell;
1531 mg->new_ocell = new_ocell;
1532 mg->start_jiffies = jiffies;
1533
1534 inc_io_migrations(cache);
1535 quiesce_migration(mg);
1536}
1537
1538
1539
1540
1541
1542static void invalidate(struct cache *cache, struct prealloc *structs,
1543 dm_oblock_t oblock, dm_cblock_t cblock,
1544 struct dm_bio_prison_cell *cell)
1545{
1546 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1547
1548 mg->err = false;
1549 mg->discard = false;
1550 mg->writeback = false;
1551 mg->demote = true;
1552 mg->promote = false;
1553 mg->requeue_holder = true;
1554 mg->invalidate = true;
1555 mg->cache = cache;
1556 mg->old_oblock = oblock;
1557 mg->cblock = cblock;
1558 mg->old_ocell = cell;
1559 mg->new_ocell = NULL;
1560 mg->start_jiffies = jiffies;
1561
1562 inc_io_migrations(cache);
1563 quiesce_migration(mg);
1564}
1565
1566static void discard(struct cache *cache, struct prealloc *structs,
1567 struct dm_bio_prison_cell *cell)
1568{
1569 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1570
1571 mg->err = false;
1572 mg->discard = true;
1573 mg->writeback = false;
1574 mg->demote = false;
1575 mg->promote = false;
1576 mg->requeue_holder = false;
1577 mg->invalidate = false;
1578 mg->cache = cache;
1579 mg->old_ocell = NULL;
1580 mg->new_ocell = cell;
1581 mg->start_jiffies = jiffies;
1582
1583 quiesce_migration(mg);
1584}
1585
1586
1587
1588
1589static void defer_bio(struct cache *cache, struct bio *bio)
1590{
1591 unsigned long flags;
1592
1593 spin_lock_irqsave(&cache->lock, flags);
1594 bio_list_add(&cache->deferred_bios, bio);
1595 spin_unlock_irqrestore(&cache->lock, flags);
1596
1597 wake_worker(cache);
1598}
1599
1600static void process_flush_bio(struct cache *cache, struct bio *bio)
1601{
1602 size_t pb_data_size = get_per_bio_data_size(cache);
1603 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1604
1605 BUG_ON(bio->bi_iter.bi_size);
1606 if (!pb->req_nr)
1607 remap_to_origin(cache, bio);
1608 else
1609 remap_to_cache(cache, bio, 0);
1610
1611
1612
1613
1614
1615
1616 issue(cache, bio);
1617}
1618
1619static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1620 struct bio *bio)
1621{
1622 int r;
1623 dm_dblock_t b, e;
1624 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1625
1626 calc_discard_block_range(cache, bio, &b, &e);
1627 if (b == e) {
1628 bio_endio(bio);
1629 return;
1630 }
1631
1632 cell_prealloc = prealloc_get_cell(structs);
1633 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1634 (cell_free_fn) prealloc_put_cell,
1635 structs, &new_ocell);
1636 if (r > 0)
1637 return;
1638
1639 discard(cache, structs, new_ocell);
1640}
1641
1642static bool spare_migration_bandwidth(struct cache *cache)
1643{
1644 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1645 cache->sectors_per_block;
1646 return current_volume < cache->migration_threshold;
1647}
1648
1649static void inc_hit_counter(struct cache *cache, struct bio *bio)
1650{
1651 atomic_inc(bio_data_dir(bio) == READ ?
1652 &cache->stats.read_hit : &cache->stats.write_hit);
1653}
1654
1655static void inc_miss_counter(struct cache *cache, struct bio *bio)
1656{
1657 atomic_inc(bio_data_dir(bio) == READ ?
1658 &cache->stats.read_miss : &cache->stats.write_miss);
1659}
1660
1661
1662
1663struct inc_detail {
1664 struct cache *cache;
1665 struct bio_list bios_for_issue;
1666 struct bio_list unhandled_bios;
1667 bool any_writes;
1668};
1669
1670static void inc_fn(void *context, struct dm_bio_prison_cell *cell)
1671{
1672 struct bio *bio;
1673 struct inc_detail *detail = context;
1674 struct cache *cache = detail->cache;
1675
1676 inc_ds(cache, cell->holder, cell);
1677 if (bio_data_dir(cell->holder) == WRITE)
1678 detail->any_writes = true;
1679
1680 while ((bio = bio_list_pop(&cell->bios))) {
1681 if (discard_or_flush(bio)) {
1682 bio_list_add(&detail->unhandled_bios, bio);
1683 continue;
1684 }
1685
1686 if (bio_data_dir(bio) == WRITE)
1687 detail->any_writes = true;
1688
1689 bio_list_add(&detail->bios_for_issue, bio);
1690 inc_ds(cache, bio, cell);
1691 }
1692}
1693
1694
1695static void remap_cell_to_origin_clear_discard(struct cache *cache,
1696 struct dm_bio_prison_cell *cell,
1697 dm_oblock_t oblock, bool issue_holder)
1698{
1699 struct bio *bio;
1700 unsigned long flags;
1701 struct inc_detail detail;
1702
1703 detail.cache = cache;
1704 bio_list_init(&detail.bios_for_issue);
1705 bio_list_init(&detail.unhandled_bios);
1706 detail.any_writes = false;
1707
1708 spin_lock_irqsave(&cache->lock, flags);
1709 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1710 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1711 spin_unlock_irqrestore(&cache->lock, flags);
1712
1713 remap_to_origin(cache, cell->holder);
1714 if (issue_holder)
1715 issue(cache, cell->holder);
1716 else
1717 accounted_begin(cache, cell->holder);
1718
1719 if (detail.any_writes)
1720 clear_discard(cache, oblock_to_dblock(cache, oblock));
1721
1722 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1723 remap_to_origin(cache, bio);
1724 issue(cache, bio);
1725 }
1726
1727 free_prison_cell(cache, cell);
1728}
1729
1730static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell,
1731 dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder)
1732{
1733 struct bio *bio;
1734 unsigned long flags;
1735 struct inc_detail detail;
1736
1737 detail.cache = cache;
1738 bio_list_init(&detail.bios_for_issue);
1739 bio_list_init(&detail.unhandled_bios);
1740 detail.any_writes = false;
1741
1742 spin_lock_irqsave(&cache->lock, flags);
1743 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1744 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1745 spin_unlock_irqrestore(&cache->lock, flags);
1746
1747 remap_to_cache(cache, cell->holder, cblock);
1748 if (issue_holder)
1749 issue(cache, cell->holder);
1750 else
1751 accounted_begin(cache, cell->holder);
1752
1753 if (detail.any_writes) {
1754 set_dirty(cache, oblock, cblock);
1755 clear_discard(cache, oblock_to_dblock(cache, oblock));
1756 }
1757
1758 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1759 remap_to_cache(cache, bio, cblock);
1760 issue(cache, bio);
1761 }
1762
1763 free_prison_cell(cache, cell);
1764}
1765
1766
1767
1768struct old_oblock_lock {
1769 struct policy_locker locker;
1770 struct cache *cache;
1771 struct prealloc *structs;
1772 struct dm_bio_prison_cell *cell;
1773};
1774
1775static int null_locker(struct policy_locker *locker, dm_oblock_t b)
1776{
1777
1778 BUG();
1779 return 0;
1780}
1781
1782static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
1783{
1784 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
1785 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
1786
1787 return bio_detain(l->cache, b, NULL, cell_prealloc,
1788 (cell_free_fn) prealloc_put_cell,
1789 l->structs, &l->cell);
1790}
1791
1792static void process_cell(struct cache *cache, struct prealloc *structs,
1793 struct dm_bio_prison_cell *new_ocell)
1794{
1795 int r;
1796 bool release_cell = true;
1797 struct bio *bio = new_ocell->holder;
1798 dm_oblock_t block = get_bio_block(cache, bio);
1799 struct policy_result lookup_result;
1800 bool passthrough = passthrough_mode(&cache->features);
1801 bool fast_promotion, can_migrate;
1802 struct old_oblock_lock ool;
1803
1804 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
1805 can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache));
1806
1807 ool.locker.fn = cell_locker;
1808 ool.cache = cache;
1809 ool.structs = structs;
1810 ool.cell = NULL;
1811 r = policy_map(cache->policy, block, true, can_migrate, fast_promotion,
1812 bio, &ool.locker, &lookup_result);
1813
1814 if (r == -EWOULDBLOCK)
1815
1816 lookup_result.op = POLICY_MISS;
1817
1818 switch (lookup_result.op) {
1819 case POLICY_HIT:
1820 if (passthrough) {
1821 inc_miss_counter(cache, bio);
1822
1823
1824
1825
1826
1827
1828
1829 if (bio_data_dir(bio) == WRITE) {
1830 atomic_inc(&cache->stats.demotion);
1831 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1832 release_cell = false;
1833
1834 } else {
1835
1836 remap_to_origin_clear_discard(cache, bio, block);
1837 inc_and_issue(cache, bio, new_ocell);
1838 }
1839 } else {
1840 inc_hit_counter(cache, bio);
1841
1842 if (bio_data_dir(bio) == WRITE &&
1843 writethrough_mode(&cache->features) &&
1844 !is_dirty(cache, lookup_result.cblock)) {
1845 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1846 inc_and_issue(cache, bio, new_ocell);
1847
1848 } else {
1849 remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true);
1850 release_cell = false;
1851 }
1852 }
1853
1854 break;
1855
1856 case POLICY_MISS:
1857 inc_miss_counter(cache, bio);
1858 remap_cell_to_origin_clear_discard(cache, new_ocell, block, true);
1859 release_cell = false;
1860 break;
1861
1862 case POLICY_NEW:
1863 atomic_inc(&cache->stats.promotion);
1864 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1865 release_cell = false;
1866 break;
1867
1868 case POLICY_REPLACE:
1869 atomic_inc(&cache->stats.demotion);
1870 atomic_inc(&cache->stats.promotion);
1871 demote_then_promote(cache, structs, lookup_result.old_oblock,
1872 block, lookup_result.cblock,
1873 ool.cell, new_ocell);
1874 release_cell = false;
1875 break;
1876
1877 default:
1878 DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u",
1879 cache_device_name(cache), __func__,
1880 (unsigned) lookup_result.op);
1881 bio_io_error(bio);
1882 }
1883
1884 if (release_cell)
1885 cell_defer(cache, new_ocell, false);
1886}
1887
1888static void process_bio(struct cache *cache, struct prealloc *structs,
1889 struct bio *bio)
1890{
1891 int r;
1892 dm_oblock_t block = get_bio_block(cache, bio);
1893 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1894
1895
1896
1897
1898 cell_prealloc = prealloc_get_cell(structs);
1899 r = bio_detain(cache, block, bio, cell_prealloc,
1900 (cell_free_fn) prealloc_put_cell,
1901 structs, &new_ocell);
1902 if (r > 0)
1903 return;
1904
1905 process_cell(cache, structs, new_ocell);
1906}
1907
1908static int need_commit_due_to_time(struct cache *cache)
1909{
1910 return jiffies < cache->last_commit_jiffies ||
1911 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
1912}
1913
1914
1915
1916
1917static int commit(struct cache *cache, bool clean_shutdown)
1918{
1919 int r;
1920
1921 if (get_cache_mode(cache) >= CM_READ_ONLY)
1922 return -EINVAL;
1923
1924 atomic_inc(&cache->stats.commit_count);
1925 r = dm_cache_commit(cache->cmd, clean_shutdown);
1926 if (r)
1927 metadata_operation_failed(cache, "dm_cache_commit", r);
1928
1929 return r;
1930}
1931
1932static int commit_if_needed(struct cache *cache)
1933{
1934 int r = 0;
1935
1936 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1937 dm_cache_changed_this_transaction(cache->cmd)) {
1938 r = commit(cache, false);
1939 cache->commit_requested = false;
1940 cache->last_commit_jiffies = jiffies;
1941 }
1942
1943 return r;
1944}
1945
1946static void process_deferred_bios(struct cache *cache)
1947{
1948 bool prealloc_used = false;
1949 unsigned long flags;
1950 struct bio_list bios;
1951 struct bio *bio;
1952 struct prealloc structs;
1953
1954 memset(&structs, 0, sizeof(structs));
1955 bio_list_init(&bios);
1956
1957 spin_lock_irqsave(&cache->lock, flags);
1958 bio_list_merge(&bios, &cache->deferred_bios);
1959 bio_list_init(&cache->deferred_bios);
1960 spin_unlock_irqrestore(&cache->lock, flags);
1961
1962 while (!bio_list_empty(&bios)) {
1963
1964
1965
1966
1967
1968 prealloc_used = true;
1969 if (prealloc_data_structs(cache, &structs)) {
1970 spin_lock_irqsave(&cache->lock, flags);
1971 bio_list_merge(&cache->deferred_bios, &bios);
1972 spin_unlock_irqrestore(&cache->lock, flags);
1973 break;
1974 }
1975
1976 bio = bio_list_pop(&bios);
1977
1978 if (bio->bi_opf & REQ_PREFLUSH)
1979 process_flush_bio(cache, bio);
1980 else if (bio_op(bio) == REQ_OP_DISCARD)
1981 process_discard_bio(cache, &structs, bio);
1982 else
1983 process_bio(cache, &structs, bio);
1984 }
1985
1986 if (prealloc_used)
1987 prealloc_free_structs(cache, &structs);
1988}
1989
1990static void process_deferred_cells(struct cache *cache)
1991{
1992 bool prealloc_used = false;
1993 unsigned long flags;
1994 struct dm_bio_prison_cell *cell, *tmp;
1995 struct list_head cells;
1996 struct prealloc structs;
1997
1998 memset(&structs, 0, sizeof(structs));
1999
2000 INIT_LIST_HEAD(&cells);
2001
2002 spin_lock_irqsave(&cache->lock, flags);
2003 list_splice_init(&cache->deferred_cells, &cells);
2004 spin_unlock_irqrestore(&cache->lock, flags);
2005
2006 list_for_each_entry_safe(cell, tmp, &cells, user_list) {
2007
2008
2009
2010
2011
2012 prealloc_used = true;
2013 if (prealloc_data_structs(cache, &structs)) {
2014 spin_lock_irqsave(&cache->lock, flags);
2015 list_splice(&cells, &cache->deferred_cells);
2016 spin_unlock_irqrestore(&cache->lock, flags);
2017 break;
2018 }
2019
2020 process_cell(cache, &structs, cell);
2021 }
2022
2023 if (prealloc_used)
2024 prealloc_free_structs(cache, &structs);
2025}
2026
2027static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
2028{
2029 unsigned long flags;
2030 struct bio_list bios;
2031 struct bio *bio;
2032
2033 bio_list_init(&bios);
2034
2035 spin_lock_irqsave(&cache->lock, flags);
2036 bio_list_merge(&bios, &cache->deferred_flush_bios);
2037 bio_list_init(&cache->deferred_flush_bios);
2038 spin_unlock_irqrestore(&cache->lock, flags);
2039
2040
2041
2042
2043 while ((bio = bio_list_pop(&bios)))
2044 submit_bios ? accounted_request(cache, bio) : bio_io_error(bio);
2045}
2046
2047static void process_deferred_writethrough_bios(struct cache *cache)
2048{
2049 unsigned long flags;
2050 struct bio_list bios;
2051 struct bio *bio;
2052
2053 bio_list_init(&bios);
2054
2055 spin_lock_irqsave(&cache->lock, flags);
2056 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
2057 bio_list_init(&cache->deferred_writethrough_bios);
2058 spin_unlock_irqrestore(&cache->lock, flags);
2059
2060
2061
2062
2063 while ((bio = bio_list_pop(&bios)))
2064 accounted_request(cache, bio);
2065}
2066
2067static void writeback_some_dirty_blocks(struct cache *cache)
2068{
2069 bool prealloc_used = false;
2070 dm_oblock_t oblock;
2071 dm_cblock_t cblock;
2072 struct prealloc structs;
2073 struct dm_bio_prison_cell *old_ocell;
2074 bool busy = !iot_idle_for(&cache->origin_tracker, HZ);
2075
2076 memset(&structs, 0, sizeof(structs));
2077
2078 while (spare_migration_bandwidth(cache)) {
2079 if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
2080 break;
2081
2082 prealloc_used = true;
2083 if (prealloc_data_structs(cache, &structs) ||
2084 get_cell(cache, oblock, &structs, &old_ocell)) {
2085 policy_set_dirty(cache->policy, oblock);
2086 break;
2087 }
2088
2089 writeback(cache, &structs, oblock, cblock, old_ocell);
2090 }
2091
2092 if (prealloc_used)
2093 prealloc_free_structs(cache, &structs);
2094}
2095
2096
2097
2098
2099
2100
2101static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
2102{
2103 int r = 0;
2104 uint64_t begin = from_cblock(req->cblocks->begin);
2105 uint64_t end = from_cblock(req->cblocks->end);
2106
2107 while (begin != end) {
2108 r = policy_remove_cblock(cache->policy, to_cblock(begin));
2109 if (!r) {
2110 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
2111 if (r) {
2112 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
2113 break;
2114 }
2115
2116 } else if (r == -ENODATA) {
2117
2118 r = 0;
2119
2120 } else {
2121 DMERR("%s: policy_remove_cblock failed", cache_device_name(cache));
2122 break;
2123 }
2124
2125 begin++;
2126 }
2127
2128 cache->commit_requested = true;
2129
2130 req->err = r;
2131 atomic_set(&req->complete, 1);
2132
2133 wake_up(&req->result_wait);
2134}
2135
2136static void process_invalidation_requests(struct cache *cache)
2137{
2138 struct list_head list;
2139 struct invalidation_request *req, *tmp;
2140
2141 INIT_LIST_HEAD(&list);
2142 spin_lock(&cache->invalidation_lock);
2143 list_splice_init(&cache->invalidation_requests, &list);
2144 spin_unlock(&cache->invalidation_lock);
2145
2146 list_for_each_entry_safe (req, tmp, &list, list)
2147 process_invalidation_request(cache, req);
2148}
2149
2150
2151
2152
2153static bool is_quiescing(struct cache *cache)
2154{
2155 return atomic_read(&cache->quiescing);
2156}
2157
2158static void ack_quiescing(struct cache *cache)
2159{
2160 if (is_quiescing(cache)) {
2161 atomic_inc(&cache->quiescing_ack);
2162 wake_up(&cache->quiescing_wait);
2163 }
2164}
2165
2166static void wait_for_quiescing_ack(struct cache *cache)
2167{
2168 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
2169}
2170
2171static void start_quiescing(struct cache *cache)
2172{
2173 atomic_inc(&cache->quiescing);
2174 wait_for_quiescing_ack(cache);
2175}
2176
2177static void stop_quiescing(struct cache *cache)
2178{
2179 atomic_set(&cache->quiescing, 0);
2180 atomic_set(&cache->quiescing_ack, 0);
2181}
2182
2183static void wait_for_migrations(struct cache *cache)
2184{
2185 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
2186}
2187
2188static void stop_worker(struct cache *cache)
2189{
2190 cancel_delayed_work(&cache->waker);
2191 flush_workqueue(cache->wq);
2192}
2193
2194static void requeue_deferred_cells(struct cache *cache)
2195{
2196 unsigned long flags;
2197 struct list_head cells;
2198 struct dm_bio_prison_cell *cell, *tmp;
2199
2200 INIT_LIST_HEAD(&cells);
2201 spin_lock_irqsave(&cache->lock, flags);
2202 list_splice_init(&cache->deferred_cells, &cells);
2203 spin_unlock_irqrestore(&cache->lock, flags);
2204
2205 list_for_each_entry_safe(cell, tmp, &cells, user_list)
2206 cell_requeue(cache, cell);
2207}
2208
2209static void requeue_deferred_bios(struct cache *cache)
2210{
2211 struct bio *bio;
2212 struct bio_list bios;
2213
2214 bio_list_init(&bios);
2215 bio_list_merge(&bios, &cache->deferred_bios);
2216 bio_list_init(&cache->deferred_bios);
2217
2218 while ((bio = bio_list_pop(&bios))) {
2219 bio->bi_error = DM_ENDIO_REQUEUE;
2220 bio_endio(bio);
2221 }
2222}
2223
2224static int more_work(struct cache *cache)
2225{
2226 if (is_quiescing(cache))
2227 return !list_empty(&cache->quiesced_migrations) ||
2228 !list_empty(&cache->completed_migrations) ||
2229 !list_empty(&cache->need_commit_migrations);
2230 else
2231 return !bio_list_empty(&cache->deferred_bios) ||
2232 !list_empty(&cache->deferred_cells) ||
2233 !bio_list_empty(&cache->deferred_flush_bios) ||
2234 !bio_list_empty(&cache->deferred_writethrough_bios) ||
2235 !list_empty(&cache->quiesced_migrations) ||
2236 !list_empty(&cache->completed_migrations) ||
2237 !list_empty(&cache->need_commit_migrations) ||
2238 cache->invalidate;
2239}
2240
2241static void do_worker(struct work_struct *ws)
2242{
2243 struct cache *cache = container_of(ws, struct cache, worker);
2244
2245 do {
2246 if (!is_quiescing(cache)) {
2247 writeback_some_dirty_blocks(cache);
2248 process_deferred_writethrough_bios(cache);
2249 process_deferred_bios(cache);
2250 process_deferred_cells(cache);
2251 process_invalidation_requests(cache);
2252 }
2253
2254 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
2255 process_migrations(cache, &cache->completed_migrations, complete_migration);
2256
2257 if (commit_if_needed(cache)) {
2258 process_deferred_flush_bios(cache, false);
2259 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
2260 } else {
2261 process_deferred_flush_bios(cache, true);
2262 process_migrations(cache, &cache->need_commit_migrations,
2263 migration_success_post_commit);
2264 }
2265
2266 ack_quiescing(cache);
2267
2268 } while (more_work(cache));
2269}
2270
2271
2272
2273
2274
2275static void do_waker(struct work_struct *ws)
2276{
2277 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
2278 policy_tick(cache->policy, true);
2279 wake_worker(cache);
2280 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
2281}
2282
2283
2284
2285static int is_congested(struct dm_dev *dev, int bdi_bits)
2286{
2287 struct request_queue *q = bdev_get_queue(dev->bdev);
2288 return bdi_congested(q->backing_dev_info, bdi_bits);
2289}
2290
2291static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2292{
2293 struct cache *cache = container_of(cb, struct cache, callbacks);
2294
2295 return is_congested(cache->origin_dev, bdi_bits) ||
2296 is_congested(cache->cache_dev, bdi_bits);
2297}
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307static void destroy(struct cache *cache)
2308{
2309 unsigned i;
2310
2311 mempool_destroy(cache->migration_pool);
2312
2313 if (cache->all_io_ds)
2314 dm_deferred_set_destroy(cache->all_io_ds);
2315
2316 if (cache->prison)
2317 dm_bio_prison_destroy(cache->prison);
2318
2319 if (cache->wq)
2320 destroy_workqueue(cache->wq);
2321
2322 if (cache->dirty_bitset)
2323 free_bitset(cache->dirty_bitset);
2324
2325 if (cache->discard_bitset)
2326 free_bitset(cache->discard_bitset);
2327
2328 if (cache->copier)
2329 dm_kcopyd_client_destroy(cache->copier);
2330
2331 if (cache->cmd)
2332 dm_cache_metadata_close(cache->cmd);
2333
2334 if (cache->metadata_dev)
2335 dm_put_device(cache->ti, cache->metadata_dev);
2336
2337 if (cache->origin_dev)
2338 dm_put_device(cache->ti, cache->origin_dev);
2339
2340 if (cache->cache_dev)
2341 dm_put_device(cache->ti, cache->cache_dev);
2342
2343 if (cache->policy)
2344 dm_cache_policy_destroy(cache->policy);
2345
2346 for (i = 0; i < cache->nr_ctr_args ; i++)
2347 kfree(cache->ctr_args[i]);
2348 kfree(cache->ctr_args);
2349
2350 kfree(cache);
2351}
2352
2353static void cache_dtr(struct dm_target *ti)
2354{
2355 struct cache *cache = ti->private;
2356
2357 destroy(cache);
2358}
2359
2360static sector_t get_dev_size(struct dm_dev *dev)
2361{
2362 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396struct cache_args {
2397 struct dm_target *ti;
2398
2399 struct dm_dev *metadata_dev;
2400
2401 struct dm_dev *cache_dev;
2402 sector_t cache_sectors;
2403
2404 struct dm_dev *origin_dev;
2405 sector_t origin_sectors;
2406
2407 uint32_t block_size;
2408
2409 const char *policy_name;
2410 int policy_argc;
2411 const char **policy_argv;
2412
2413 struct cache_features features;
2414};
2415
2416static void destroy_cache_args(struct cache_args *ca)
2417{
2418 if (ca->metadata_dev)
2419 dm_put_device(ca->ti, ca->metadata_dev);
2420
2421 if (ca->cache_dev)
2422 dm_put_device(ca->ti, ca->cache_dev);
2423
2424 if (ca->origin_dev)
2425 dm_put_device(ca->ti, ca->origin_dev);
2426
2427 kfree(ca);
2428}
2429
2430static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2431{
2432 if (!as->argc) {
2433 *error = "Insufficient args";
2434 return false;
2435 }
2436
2437 return true;
2438}
2439
2440static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2441 char **error)
2442{
2443 int r;
2444 sector_t metadata_dev_size;
2445 char b[BDEVNAME_SIZE];
2446
2447 if (!at_least_one_arg(as, error))
2448 return -EINVAL;
2449
2450 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2451 &ca->metadata_dev);
2452 if (r) {
2453 *error = "Error opening metadata device";
2454 return r;
2455 }
2456
2457 metadata_dev_size = get_dev_size(ca->metadata_dev);
2458 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2459 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2460 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2461
2462 return 0;
2463}
2464
2465static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2466 char **error)
2467{
2468 int r;
2469
2470 if (!at_least_one_arg(as, error))
2471 return -EINVAL;
2472
2473 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2474 &ca->cache_dev);
2475 if (r) {
2476 *error = "Error opening cache device";
2477 return r;
2478 }
2479 ca->cache_sectors = get_dev_size(ca->cache_dev);
2480
2481 return 0;
2482}
2483
2484static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2485 char **error)
2486{
2487 int r;
2488
2489 if (!at_least_one_arg(as, error))
2490 return -EINVAL;
2491
2492 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2493 &ca->origin_dev);
2494 if (r) {
2495 *error = "Error opening origin device";
2496 return r;
2497 }
2498
2499 ca->origin_sectors = get_dev_size(ca->origin_dev);
2500 if (ca->ti->len > ca->origin_sectors) {
2501 *error = "Device size larger than cached device";
2502 return -EINVAL;
2503 }
2504
2505 return 0;
2506}
2507
2508static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2509 char **error)
2510{
2511 unsigned long block_size;
2512
2513 if (!at_least_one_arg(as, error))
2514 return -EINVAL;
2515
2516 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2517 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2518 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2519 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2520 *error = "Invalid data block size";
2521 return -EINVAL;
2522 }
2523
2524 if (block_size > ca->cache_sectors) {
2525 *error = "Data block size is larger than the cache device";
2526 return -EINVAL;
2527 }
2528
2529 ca->block_size = block_size;
2530
2531 return 0;
2532}
2533
2534static void init_features(struct cache_features *cf)
2535{
2536 cf->mode = CM_WRITE;
2537 cf->io_mode = CM_IO_WRITEBACK;
2538 cf->metadata_version = 1;
2539}
2540
2541static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2542 char **error)
2543{
2544 static struct dm_arg _args[] = {
2545 {0, 2, "Invalid number of cache feature arguments"},
2546 };
2547
2548 int r;
2549 unsigned argc;
2550 const char *arg;
2551 struct cache_features *cf = &ca->features;
2552
2553 init_features(cf);
2554
2555 r = dm_read_arg_group(_args, as, &argc, error);
2556 if (r)
2557 return -EINVAL;
2558
2559 while (argc--) {
2560 arg = dm_shift_arg(as);
2561
2562 if (!strcasecmp(arg, "writeback"))
2563 cf->io_mode = CM_IO_WRITEBACK;
2564
2565 else if (!strcasecmp(arg, "writethrough"))
2566 cf->io_mode = CM_IO_WRITETHROUGH;
2567
2568 else if (!strcasecmp(arg, "passthrough"))
2569 cf->io_mode = CM_IO_PASSTHROUGH;
2570
2571 else if (!strcasecmp(arg, "metadata2"))
2572 cf->metadata_version = 2;
2573
2574 else {
2575 *error = "Unrecognised cache feature requested";
2576 return -EINVAL;
2577 }
2578 }
2579
2580 return 0;
2581}
2582
2583static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2584 char **error)
2585{
2586 static struct dm_arg _args[] = {
2587 {0, 1024, "Invalid number of policy arguments"},
2588 };
2589
2590 int r;
2591
2592 if (!at_least_one_arg(as, error))
2593 return -EINVAL;
2594
2595 ca->policy_name = dm_shift_arg(as);
2596
2597 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2598 if (r)
2599 return -EINVAL;
2600
2601 ca->policy_argv = (const char **)as->argv;
2602 dm_consume_args(as, ca->policy_argc);
2603
2604 return 0;
2605}
2606
2607static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2608 char **error)
2609{
2610 int r;
2611 struct dm_arg_set as;
2612
2613 as.argc = argc;
2614 as.argv = argv;
2615
2616 r = parse_metadata_dev(ca, &as, error);
2617 if (r)
2618 return r;
2619
2620 r = parse_cache_dev(ca, &as, error);
2621 if (r)
2622 return r;
2623
2624 r = parse_origin_dev(ca, &as, error);
2625 if (r)
2626 return r;
2627
2628 r = parse_block_size(ca, &as, error);
2629 if (r)
2630 return r;
2631
2632 r = parse_features(ca, &as, error);
2633 if (r)
2634 return r;
2635
2636 r = parse_policy(ca, &as, error);
2637 if (r)
2638 return r;
2639
2640 return 0;
2641}
2642
2643
2644
2645static struct kmem_cache *migration_cache;
2646
2647#define NOT_CORE_OPTION 1
2648
2649static int process_config_option(struct cache *cache, const char *key, const char *value)
2650{
2651 unsigned long tmp;
2652
2653 if (!strcasecmp(key, "migration_threshold")) {
2654 if (kstrtoul(value, 10, &tmp))
2655 return -EINVAL;
2656
2657 cache->migration_threshold = tmp;
2658 return 0;
2659 }
2660
2661 return NOT_CORE_OPTION;
2662}
2663
2664static int set_config_value(struct cache *cache, const char *key, const char *value)
2665{
2666 int r = process_config_option(cache, key, value);
2667
2668 if (r == NOT_CORE_OPTION)
2669 r = policy_set_config_value(cache->policy, key, value);
2670
2671 if (r)
2672 DMWARN("bad config value for %s: %s", key, value);
2673
2674 return r;
2675}
2676
2677static int set_config_values(struct cache *cache, int argc, const char **argv)
2678{
2679 int r = 0;
2680
2681 if (argc & 1) {
2682 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2683 return -EINVAL;
2684 }
2685
2686 while (argc) {
2687 r = set_config_value(cache, argv[0], argv[1]);
2688 if (r)
2689 break;
2690
2691 argc -= 2;
2692 argv += 2;
2693 }
2694
2695 return r;
2696}
2697
2698static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2699 char **error)
2700{
2701 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2702 cache->cache_size,
2703 cache->origin_sectors,
2704 cache->sectors_per_block);
2705 if (IS_ERR(p)) {
2706 *error = "Error creating cache's policy";
2707 return PTR_ERR(p);
2708 }
2709 cache->policy = p;
2710
2711 return 0;
2712}
2713
2714
2715
2716
2717
2718#define MAX_DISCARD_BLOCKS (1 << 14)
2719
2720static bool too_many_discard_blocks(sector_t discard_block_size,
2721 sector_t origin_size)
2722{
2723 (void) sector_div(origin_size, discard_block_size);
2724
2725 return origin_size > MAX_DISCARD_BLOCKS;
2726}
2727
2728static sector_t calculate_discard_block_size(sector_t cache_block_size,
2729 sector_t origin_size)
2730{
2731 sector_t discard_block_size = cache_block_size;
2732
2733 if (origin_size)
2734 while (too_many_discard_blocks(discard_block_size, origin_size))
2735 discard_block_size *= 2;
2736
2737 return discard_block_size;
2738}
2739
2740static void set_cache_size(struct cache *cache, dm_cblock_t size)
2741{
2742 dm_block_t nr_blocks = from_cblock(size);
2743
2744 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2745 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2746 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2747 "Please consider increasing the cache block size to reduce the overall cache block count.",
2748 (unsigned long long) nr_blocks);
2749
2750 cache->cache_size = size;
2751}
2752
2753#define DEFAULT_MIGRATION_THRESHOLD 2048
2754
2755static int cache_create(struct cache_args *ca, struct cache **result)
2756{
2757 int r = 0;
2758 char **error = &ca->ti->error;
2759 struct cache *cache;
2760 struct dm_target *ti = ca->ti;
2761 dm_block_t origin_blocks;
2762 struct dm_cache_metadata *cmd;
2763 bool may_format = ca->features.mode == CM_WRITE;
2764
2765 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2766 if (!cache)
2767 return -ENOMEM;
2768
2769 cache->ti = ca->ti;
2770 ti->private = cache;
2771 ti->num_flush_bios = 2;
2772 ti->flush_supported = true;
2773
2774 ti->num_discard_bios = 1;
2775 ti->discards_supported = true;
2776 ti->discard_zeroes_data_unsupported = true;
2777 ti->split_discard_bios = false;
2778
2779 cache->features = ca->features;
2780 ti->per_io_data_size = get_per_bio_data_size(cache);
2781
2782 cache->callbacks.congested_fn = cache_is_congested;
2783 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2784
2785 cache->metadata_dev = ca->metadata_dev;
2786 cache->origin_dev = ca->origin_dev;
2787 cache->cache_dev = ca->cache_dev;
2788
2789 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2790
2791
2792 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2793 origin_blocks = block_div(origin_blocks, ca->block_size);
2794 cache->origin_blocks = to_oblock(origin_blocks);
2795
2796 cache->sectors_per_block = ca->block_size;
2797 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2798 r = -EINVAL;
2799 goto bad;
2800 }
2801
2802 if (ca->block_size & (ca->block_size - 1)) {
2803 dm_block_t cache_size = ca->cache_sectors;
2804
2805 cache->sectors_per_block_shift = -1;
2806 cache_size = block_div(cache_size, ca->block_size);
2807 set_cache_size(cache, to_cblock(cache_size));
2808 } else {
2809 cache->sectors_per_block_shift = __ffs(ca->block_size);
2810 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2811 }
2812
2813 r = create_cache_policy(cache, ca, error);
2814 if (r)
2815 goto bad;
2816
2817 cache->policy_nr_args = ca->policy_argc;
2818 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2819
2820 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2821 if (r) {
2822 *error = "Error setting cache policy's config values";
2823 goto bad;
2824 }
2825
2826 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2827 ca->block_size, may_format,
2828 dm_cache_policy_get_hint_size(cache->policy),
2829 ca->features.metadata_version);
2830 if (IS_ERR(cmd)) {
2831 *error = "Error creating metadata object";
2832 r = PTR_ERR(cmd);
2833 goto bad;
2834 }
2835 cache->cmd = cmd;
2836 set_cache_mode(cache, CM_WRITE);
2837 if (get_cache_mode(cache) != CM_WRITE) {
2838 *error = "Unable to get write access to metadata, please check/repair metadata.";
2839 r = -EINVAL;
2840 goto bad;
2841 }
2842
2843 if (passthrough_mode(&cache->features)) {
2844 bool all_clean;
2845
2846 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2847 if (r) {
2848 *error = "dm_cache_metadata_all_clean() failed";
2849 goto bad;
2850 }
2851
2852 if (!all_clean) {
2853 *error = "Cannot enter passthrough mode unless all blocks are clean";
2854 r = -EINVAL;
2855 goto bad;
2856 }
2857 }
2858
2859 spin_lock_init(&cache->lock);
2860 INIT_LIST_HEAD(&cache->deferred_cells);
2861 bio_list_init(&cache->deferred_bios);
2862 bio_list_init(&cache->deferred_flush_bios);
2863 bio_list_init(&cache->deferred_writethrough_bios);
2864 INIT_LIST_HEAD(&cache->quiesced_migrations);
2865 INIT_LIST_HEAD(&cache->completed_migrations);
2866 INIT_LIST_HEAD(&cache->need_commit_migrations);
2867 atomic_set(&cache->nr_allocated_migrations, 0);
2868 atomic_set(&cache->nr_io_migrations, 0);
2869 init_waitqueue_head(&cache->migration_wait);
2870
2871 init_waitqueue_head(&cache->quiescing_wait);
2872 atomic_set(&cache->quiescing, 0);
2873 atomic_set(&cache->quiescing_ack, 0);
2874
2875 r = -ENOMEM;
2876 atomic_set(&cache->nr_dirty, 0);
2877 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2878 if (!cache->dirty_bitset) {
2879 *error = "could not allocate dirty bitset";
2880 goto bad;
2881 }
2882 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2883
2884 cache->discard_block_size =
2885 calculate_discard_block_size(cache->sectors_per_block,
2886 cache->origin_sectors);
2887 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2888 cache->discard_block_size));
2889 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2890 if (!cache->discard_bitset) {
2891 *error = "could not allocate discard bitset";
2892 goto bad;
2893 }
2894 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2895
2896 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2897 if (IS_ERR(cache->copier)) {
2898 *error = "could not create kcopyd client";
2899 r = PTR_ERR(cache->copier);
2900 goto bad;
2901 }
2902
2903 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2904 if (!cache->wq) {
2905 *error = "could not create workqueue for metadata object";
2906 goto bad;
2907 }
2908 INIT_WORK(&cache->worker, do_worker);
2909 INIT_DELAYED_WORK(&cache->waker, do_waker);
2910 cache->last_commit_jiffies = jiffies;
2911
2912 cache->prison = dm_bio_prison_create();
2913 if (!cache->prison) {
2914 *error = "could not create bio prison";
2915 goto bad;
2916 }
2917
2918 cache->all_io_ds = dm_deferred_set_create();
2919 if (!cache->all_io_ds) {
2920 *error = "could not create all_io deferred set";
2921 goto bad;
2922 }
2923
2924 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2925 migration_cache);
2926 if (!cache->migration_pool) {
2927 *error = "Error creating cache's migration mempool";
2928 goto bad;
2929 }
2930
2931 cache->need_tick_bio = true;
2932 cache->sized = false;
2933 cache->invalidate = false;
2934 cache->commit_requested = false;
2935 cache->loaded_mappings = false;
2936 cache->loaded_discards = false;
2937
2938 load_stats(cache);
2939
2940 atomic_set(&cache->stats.demotion, 0);
2941 atomic_set(&cache->stats.promotion, 0);
2942 atomic_set(&cache->stats.copies_avoided, 0);
2943 atomic_set(&cache->stats.cache_cell_clash, 0);
2944 atomic_set(&cache->stats.commit_count, 0);
2945 atomic_set(&cache->stats.discard_count, 0);
2946
2947 spin_lock_init(&cache->invalidation_lock);
2948 INIT_LIST_HEAD(&cache->invalidation_requests);
2949
2950 iot_init(&cache->origin_tracker);
2951
2952 *result = cache;
2953 return 0;
2954
2955bad:
2956 destroy(cache);
2957 return r;
2958}
2959
2960static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2961{
2962 unsigned i;
2963 const char **copy;
2964
2965 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2966 if (!copy)
2967 return -ENOMEM;
2968 for (i = 0; i < argc; i++) {
2969 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2970 if (!copy[i]) {
2971 while (i--)
2972 kfree(copy[i]);
2973 kfree(copy);
2974 return -ENOMEM;
2975 }
2976 }
2977
2978 cache->nr_ctr_args = argc;
2979 cache->ctr_args = copy;
2980
2981 return 0;
2982}
2983
2984static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2985{
2986 int r = -EINVAL;
2987 struct cache_args *ca;
2988 struct cache *cache = NULL;
2989
2990 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2991 if (!ca) {
2992 ti->error = "Error allocating memory for cache";
2993 return -ENOMEM;
2994 }
2995 ca->ti = ti;
2996
2997 r = parse_cache_args(ca, argc, argv, &ti->error);
2998 if (r)
2999 goto out;
3000
3001 r = cache_create(ca, &cache);
3002 if (r)
3003 goto out;
3004
3005 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
3006 if (r) {
3007 destroy(cache);
3008 goto out;
3009 }
3010
3011 ti->private = cache;
3012
3013out:
3014 destroy_cache_args(ca);
3015 return r;
3016}
3017
3018
3019
3020static int cache_map(struct dm_target *ti, struct bio *bio)
3021{
3022 struct cache *cache = ti->private;
3023
3024 int r;
3025 struct dm_bio_prison_cell *cell = NULL;
3026 dm_oblock_t block = get_bio_block(cache, bio);
3027 size_t pb_data_size = get_per_bio_data_size(cache);
3028 bool can_migrate = false;
3029 bool fast_promotion;
3030 struct policy_result lookup_result;
3031 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
3032 struct old_oblock_lock ool;
3033
3034 ool.locker.fn = null_locker;
3035
3036 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
3037
3038
3039
3040
3041
3042 remap_to_origin(cache, bio);
3043 accounted_begin(cache, bio);
3044 return DM_MAPIO_REMAPPED;
3045 }
3046
3047 if (discard_or_flush(bio)) {
3048 defer_bio(cache, bio);
3049 return DM_MAPIO_SUBMITTED;
3050 }
3051
3052
3053
3054
3055 cell = alloc_prison_cell(cache);
3056 if (!cell) {
3057 defer_bio(cache, bio);
3058 return DM_MAPIO_SUBMITTED;
3059 }
3060
3061 r = bio_detain(cache, block, bio, cell,
3062 (cell_free_fn) free_prison_cell,
3063 cache, &cell);
3064 if (r) {
3065 if (r < 0)
3066 defer_bio(cache, bio);
3067
3068 return DM_MAPIO_SUBMITTED;
3069 }
3070
3071 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
3072
3073 r = policy_map(cache->policy, block, false, can_migrate, fast_promotion,
3074 bio, &ool.locker, &lookup_result);
3075 if (r == -EWOULDBLOCK) {
3076 cell_defer(cache, cell, true);
3077 return DM_MAPIO_SUBMITTED;
3078
3079 } else if (r) {
3080 DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d",
3081 cache_device_name(cache), r);
3082 cell_defer(cache, cell, false);
3083 bio_io_error(bio);
3084 return DM_MAPIO_SUBMITTED;
3085 }
3086
3087 r = DM_MAPIO_REMAPPED;
3088 switch (lookup_result.op) {
3089 case POLICY_HIT:
3090 if (passthrough_mode(&cache->features)) {
3091 if (bio_data_dir(bio) == WRITE) {
3092
3093
3094
3095
3096 cell_defer(cache, cell, true);
3097 r = DM_MAPIO_SUBMITTED;
3098
3099 } else {
3100 inc_miss_counter(cache, bio);
3101 remap_to_origin_clear_discard(cache, bio, block);
3102 accounted_begin(cache, bio);
3103 inc_ds(cache, bio, cell);
3104
3105
3106 cell_defer(cache, cell, false);
3107 }
3108
3109 } else {
3110 inc_hit_counter(cache, bio);
3111 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
3112 !is_dirty(cache, lookup_result.cblock)) {
3113 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
3114 accounted_begin(cache, bio);
3115 inc_ds(cache, bio, cell);
3116 cell_defer(cache, cell, false);
3117
3118 } else
3119 remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false);
3120 }
3121 break;
3122
3123 case POLICY_MISS:
3124 inc_miss_counter(cache, bio);
3125 if (pb->req_nr != 0) {
3126
3127
3128
3129
3130 bio_endio(bio);
3131
3132 cell_defer(cache, cell, false);
3133 r = DM_MAPIO_SUBMITTED;
3134
3135 } else
3136 remap_cell_to_origin_clear_discard(cache, cell, block, false);
3137 break;
3138
3139 default:
3140 DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u",
3141 cache_device_name(cache), __func__,
3142 (unsigned) lookup_result.op);
3143 cell_defer(cache, cell, false);
3144 bio_io_error(bio);
3145 r = DM_MAPIO_SUBMITTED;
3146 }
3147
3148 return r;
3149}
3150
3151static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
3152{
3153 struct cache *cache = ti->private;
3154 unsigned long flags;
3155 size_t pb_data_size = get_per_bio_data_size(cache);
3156 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
3157
3158 if (pb->tick) {
3159 policy_tick(cache->policy, false);
3160
3161 spin_lock_irqsave(&cache->lock, flags);
3162 cache->need_tick_bio = true;
3163 spin_unlock_irqrestore(&cache->lock, flags);
3164 }
3165
3166 check_for_quiesced_migrations(cache, pb);
3167 accounted_complete(cache, bio);
3168
3169 return 0;
3170}
3171
3172static int write_dirty_bitset(struct cache *cache)
3173{
3174 int r;
3175
3176 if (get_cache_mode(cache) >= CM_READ_ONLY)
3177 return -EINVAL;
3178
3179 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
3180 if (r)
3181 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
3182
3183 return r;
3184}
3185
3186static int write_discard_bitset(struct cache *cache)
3187{
3188 unsigned i, r;
3189
3190 if (get_cache_mode(cache) >= CM_READ_ONLY)
3191 return -EINVAL;
3192
3193 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
3194 cache->discard_nr_blocks);
3195 if (r) {
3196 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
3197 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
3198 return r;
3199 }
3200
3201 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
3202 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
3203 is_discarded(cache, to_dblock(i)));
3204 if (r) {
3205 metadata_operation_failed(cache, "dm_cache_set_discard", r);
3206 return r;
3207 }
3208 }
3209
3210 return 0;
3211}
3212
3213static int write_hints(struct cache *cache)
3214{
3215 int r;
3216
3217 if (get_cache_mode(cache) >= CM_READ_ONLY)
3218 return -EINVAL;
3219
3220 r = dm_cache_write_hints(cache->cmd, cache->policy);
3221 if (r) {
3222 metadata_operation_failed(cache, "dm_cache_write_hints", r);
3223 return r;
3224 }
3225
3226 return 0;
3227}
3228
3229
3230
3231
3232static bool sync_metadata(struct cache *cache)
3233{
3234 int r1, r2, r3, r4;
3235
3236 r1 = write_dirty_bitset(cache);
3237 if (r1)
3238 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
3239
3240 r2 = write_discard_bitset(cache);
3241 if (r2)
3242 DMERR("%s: could not write discard bitset", cache_device_name(cache));
3243
3244 save_stats(cache);
3245
3246 r3 = write_hints(cache);
3247 if (r3)
3248 DMERR("%s: could not write hints", cache_device_name(cache));
3249
3250
3251
3252
3253
3254
3255 r4 = commit(cache, !r1 && !r2 && !r3);
3256 if (r4)
3257 DMERR("%s: could not write cache metadata", cache_device_name(cache));
3258
3259 return !r1 && !r2 && !r3 && !r4;
3260}
3261
3262static void cache_postsuspend(struct dm_target *ti)
3263{
3264 struct cache *cache = ti->private;
3265
3266 start_quiescing(cache);
3267 wait_for_migrations(cache);
3268 stop_worker(cache);
3269 requeue_deferred_bios(cache);
3270 requeue_deferred_cells(cache);
3271 stop_quiescing(cache);
3272
3273 if (get_cache_mode(cache) == CM_WRITE)
3274 (void) sync_metadata(cache);
3275}
3276
3277static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
3278 bool dirty, uint32_t hint, bool hint_valid)
3279{
3280 int r;
3281 struct cache *cache = context;
3282
3283 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
3284 if (r)
3285 return r;
3286
3287 if (dirty)
3288 set_dirty(cache, oblock, cblock);
3289 else
3290 clear_dirty(cache, oblock, cblock);
3291
3292 return 0;
3293}
3294
3295
3296
3297
3298
3299
3300
3301struct discard_load_info {
3302 struct cache *cache;
3303
3304
3305
3306
3307
3308 dm_block_t block_size;
3309 dm_block_t discard_begin, discard_end;
3310};
3311
3312static void discard_load_info_init(struct cache *cache,
3313 struct discard_load_info *li)
3314{
3315 li->cache = cache;
3316 li->discard_begin = li->discard_end = 0;
3317}
3318
3319static void set_discard_range(struct discard_load_info *li)
3320{
3321 sector_t b, e;
3322
3323 if (li->discard_begin == li->discard_end)
3324 return;
3325
3326
3327
3328
3329 b = li->discard_begin * li->block_size;
3330 e = li->discard_end * li->block_size;
3331
3332
3333
3334
3335 b = dm_sector_div_up(b, li->cache->discard_block_size);
3336 sector_div(e, li->cache->discard_block_size);
3337
3338
3339
3340
3341
3342 if (e > from_dblock(li->cache->discard_nr_blocks))
3343 e = from_dblock(li->cache->discard_nr_blocks);
3344
3345 for (; b < e; b++)
3346 set_discard(li->cache, to_dblock(b));
3347}
3348
3349static int load_discard(void *context, sector_t discard_block_size,
3350 dm_dblock_t dblock, bool discard)
3351{
3352 struct discard_load_info *li = context;
3353
3354 li->block_size = discard_block_size;
3355
3356 if (discard) {
3357 if (from_dblock(dblock) == li->discard_end)
3358
3359
3360
3361 li->discard_end = li->discard_end + 1ULL;
3362
3363 else {
3364
3365
3366
3367 set_discard_range(li);
3368 li->discard_begin = from_dblock(dblock);
3369 li->discard_end = li->discard_begin + 1ULL;
3370 }
3371 } else {
3372 set_discard_range(li);
3373 li->discard_begin = li->discard_end = 0;
3374 }
3375
3376 return 0;
3377}
3378
3379static dm_cblock_t get_cache_dev_size(struct cache *cache)
3380{
3381 sector_t size = get_dev_size(cache->cache_dev);
3382 (void) sector_div(size, cache->sectors_per_block);
3383 return to_cblock(size);
3384}
3385
3386static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3387{
3388 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3389 return true;
3390
3391
3392
3393
3394 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3395 new_size = to_cblock(from_cblock(new_size) + 1);
3396 if (is_dirty(cache, new_size)) {
3397 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
3398 cache_device_name(cache),
3399 (unsigned long long) from_cblock(new_size));
3400 return false;
3401 }
3402 }
3403
3404 return true;
3405}
3406
3407static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3408{
3409 int r;
3410
3411 r = dm_cache_resize(cache->cmd, new_size);
3412 if (r) {
3413 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3414 metadata_operation_failed(cache, "dm_cache_resize", r);
3415 return r;
3416 }
3417
3418 set_cache_size(cache, new_size);
3419
3420 return 0;
3421}
3422
3423static int cache_preresume(struct dm_target *ti)
3424{
3425 int r = 0;
3426 struct cache *cache = ti->private;
3427 dm_cblock_t csize = get_cache_dev_size(cache);
3428
3429
3430
3431
3432 if (!cache->sized) {
3433 r = resize_cache_dev(cache, csize);
3434 if (r)
3435 return r;
3436
3437 cache->sized = true;
3438
3439 } else if (csize != cache->cache_size) {
3440 if (!can_resize(cache, csize))
3441 return -EINVAL;
3442
3443 r = resize_cache_dev(cache, csize);
3444 if (r)
3445 return r;
3446 }
3447
3448 if (!cache->loaded_mappings) {
3449 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3450 load_mapping, cache);
3451 if (r) {
3452 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3453 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3454 return r;
3455 }
3456
3457 cache->loaded_mappings = true;
3458 }
3459
3460 if (!cache->loaded_discards) {
3461 struct discard_load_info li;
3462
3463
3464
3465
3466
3467
3468 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3469
3470 discard_load_info_init(cache, &li);
3471 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3472 if (r) {
3473 DMERR("%s: could not load origin discards", cache_device_name(cache));
3474 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3475 return r;
3476 }
3477 set_discard_range(&li);
3478
3479 cache->loaded_discards = true;
3480 }
3481
3482 return r;
3483}
3484
3485static void cache_resume(struct dm_target *ti)
3486{
3487 struct cache *cache = ti->private;
3488
3489 cache->need_tick_bio = true;
3490 do_waker(&cache->waker.work);
3491}
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504static void cache_status(struct dm_target *ti, status_type_t type,
3505 unsigned status_flags, char *result, unsigned maxlen)
3506{
3507 int r = 0;
3508 unsigned i;
3509 ssize_t sz = 0;
3510 dm_block_t nr_free_blocks_metadata = 0;
3511 dm_block_t nr_blocks_metadata = 0;
3512 char buf[BDEVNAME_SIZE];
3513 struct cache *cache = ti->private;
3514 dm_cblock_t residency;
3515 bool needs_check;
3516
3517 switch (type) {
3518 case STATUSTYPE_INFO:
3519 if (get_cache_mode(cache) == CM_FAIL) {
3520 DMEMIT("Fail");
3521 break;
3522 }
3523
3524
3525 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3526 (void) commit(cache, false);
3527
3528 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3529 if (r) {
3530 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3531 cache_device_name(cache), r);
3532 goto err;
3533 }
3534
3535 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3536 if (r) {
3537 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3538 cache_device_name(cache), r);
3539 goto err;
3540 }
3541
3542 residency = policy_residency(cache->policy);
3543
3544 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3545 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3546 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3547 (unsigned long long)nr_blocks_metadata,
3548 (unsigned long long)cache->sectors_per_block,
3549 (unsigned long long) from_cblock(residency),
3550 (unsigned long long) from_cblock(cache->cache_size),
3551 (unsigned) atomic_read(&cache->stats.read_hit),
3552 (unsigned) atomic_read(&cache->stats.read_miss),
3553 (unsigned) atomic_read(&cache->stats.write_hit),
3554 (unsigned) atomic_read(&cache->stats.write_miss),
3555 (unsigned) atomic_read(&cache->stats.demotion),
3556 (unsigned) atomic_read(&cache->stats.promotion),
3557 (unsigned long) atomic_read(&cache->nr_dirty));
3558
3559 if (cache->features.metadata_version == 2)
3560 DMEMIT("2 metadata2 ");
3561 else
3562 DMEMIT("1 ");
3563
3564 if (writethrough_mode(&cache->features))
3565 DMEMIT("writethrough ");
3566
3567 else if (passthrough_mode(&cache->features))
3568 DMEMIT("passthrough ");
3569
3570 else if (writeback_mode(&cache->features))
3571 DMEMIT("writeback ");
3572
3573 else {
3574 DMERR("%s: internal error: unknown io mode: %d",
3575 cache_device_name(cache), (int) cache->features.io_mode);
3576 goto err;
3577 }
3578
3579 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3580
3581 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3582 if (sz < maxlen) {
3583 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3584 if (r)
3585 DMERR("%s: policy_emit_config_values returned %d",
3586 cache_device_name(cache), r);
3587 }
3588
3589 if (get_cache_mode(cache) == CM_READ_ONLY)
3590 DMEMIT("ro ");
3591 else
3592 DMEMIT("rw ");
3593
3594 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3595
3596 if (r || needs_check)
3597 DMEMIT("needs_check ");
3598 else
3599 DMEMIT("- ");
3600
3601 break;
3602
3603 case STATUSTYPE_TABLE:
3604 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3605 DMEMIT("%s ", buf);
3606 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3607 DMEMIT("%s ", buf);
3608 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3609 DMEMIT("%s", buf);
3610
3611 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3612 DMEMIT(" %s", cache->ctr_args[i]);
3613 if (cache->nr_ctr_args)
3614 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3615 }
3616
3617 return;
3618
3619err:
3620 DMEMIT("Error");
3621}
3622
3623
3624
3625
3626
3627
3628
3629static int parse_cblock_range(struct cache *cache, const char *str,
3630 struct cblock_range *result)
3631{
3632 char dummy;
3633 uint64_t b, e;
3634 int r;
3635
3636
3637
3638
3639 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3640 if (r < 0)
3641 return r;
3642
3643 if (r == 2) {
3644 result->begin = to_cblock(b);
3645 result->end = to_cblock(e);
3646 return 0;
3647 }
3648
3649
3650
3651
3652 r = sscanf(str, "%llu%c", &b, &dummy);
3653 if (r < 0)
3654 return r;
3655
3656 if (r == 1) {
3657 result->begin = to_cblock(b);
3658 result->end = to_cblock(from_cblock(result->begin) + 1u);
3659 return 0;
3660 }
3661
3662 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3663 return -EINVAL;
3664}
3665
3666static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3667{
3668 uint64_t b = from_cblock(range->begin);
3669 uint64_t e = from_cblock(range->end);
3670 uint64_t n = from_cblock(cache->cache_size);
3671
3672 if (b >= n) {
3673 DMERR("%s: begin cblock out of range: %llu >= %llu",
3674 cache_device_name(cache), b, n);
3675 return -EINVAL;
3676 }
3677
3678 if (e > n) {
3679 DMERR("%s: end cblock out of range: %llu > %llu",
3680 cache_device_name(cache), e, n);
3681 return -EINVAL;
3682 }
3683
3684 if (b >= e) {
3685 DMERR("%s: invalid cblock range: %llu >= %llu",
3686 cache_device_name(cache), b, e);
3687 return -EINVAL;
3688 }
3689
3690 return 0;
3691}
3692
3693static int request_invalidation(struct cache *cache, struct cblock_range *range)
3694{
3695 struct invalidation_request req;
3696
3697 INIT_LIST_HEAD(&req.list);
3698 req.cblocks = range;
3699 atomic_set(&req.complete, 0);
3700 req.err = 0;
3701 init_waitqueue_head(&req.result_wait);
3702
3703 spin_lock(&cache->invalidation_lock);
3704 list_add(&req.list, &cache->invalidation_requests);
3705 spin_unlock(&cache->invalidation_lock);
3706 wake_worker(cache);
3707
3708 wait_event(req.result_wait, atomic_read(&req.complete));
3709 return req.err;
3710}
3711
3712static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3713 const char **cblock_ranges)
3714{
3715 int r = 0;
3716 unsigned i;
3717 struct cblock_range range;
3718
3719 if (!passthrough_mode(&cache->features)) {
3720 DMERR("%s: cache has to be in passthrough mode for invalidation",
3721 cache_device_name(cache));
3722 return -EPERM;
3723 }
3724
3725 for (i = 0; i < count; i++) {
3726 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3727 if (r)
3728 break;
3729
3730 r = validate_cblock_range(cache, &range);
3731 if (r)
3732 break;
3733
3734
3735
3736
3737 r = request_invalidation(cache, &range);
3738 if (r)
3739 break;
3740 }
3741
3742 return r;
3743}
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3754{
3755 struct cache *cache = ti->private;
3756
3757 if (!argc)
3758 return -EINVAL;
3759
3760 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3761 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3762 cache_device_name(cache));
3763 return -EOPNOTSUPP;
3764 }
3765
3766 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3767 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3768
3769 if (argc != 2)
3770 return -EINVAL;
3771
3772 return set_config_value(cache, argv[0], argv[1]);
3773}
3774
3775static int cache_iterate_devices(struct dm_target *ti,
3776 iterate_devices_callout_fn fn, void *data)
3777{
3778 int r = 0;
3779 struct cache *cache = ti->private;
3780
3781 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3782 if (!r)
3783 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3784
3785 return r;
3786}
3787
3788static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3789{
3790
3791
3792
3793 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3794 cache->origin_sectors);
3795 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3796}
3797
3798static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3799{
3800 struct cache *cache = ti->private;
3801 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3802
3803
3804
3805
3806
3807 if (io_opt_sectors < cache->sectors_per_block ||
3808 do_div(io_opt_sectors, cache->sectors_per_block)) {
3809 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3810 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3811 }
3812 set_discard_limits(cache, limits);
3813}
3814
3815
3816
3817static struct target_type cache_target = {
3818 .name = "cache",
3819 .version = {1, 10, 0},
3820 .module = THIS_MODULE,
3821 .ctr = cache_ctr,
3822 .dtr = cache_dtr,
3823 .map = cache_map,
3824 .end_io = cache_end_io,
3825 .postsuspend = cache_postsuspend,
3826 .preresume = cache_preresume,
3827 .resume = cache_resume,
3828 .status = cache_status,
3829 .message = cache_message,
3830 .iterate_devices = cache_iterate_devices,
3831 .io_hints = cache_io_hints,
3832};
3833
3834static int __init dm_cache_init(void)
3835{
3836 int r;
3837
3838 r = dm_register_target(&cache_target);
3839 if (r) {
3840 DMERR("cache target registration failed: %d", r);
3841 return r;
3842 }
3843
3844 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3845 if (!migration_cache) {
3846 dm_unregister_target(&cache_target);
3847 return -ENOMEM;
3848 }
3849
3850 return 0;
3851}
3852
3853static void __exit dm_cache_exit(void)
3854{
3855 dm_unregister_target(&cache_target);
3856 kmem_cache_destroy(migration_cache);
3857}
3858
3859module_init(dm_cache_init);
3860module_exit(dm_cache_exit);
3861
3862MODULE_DESCRIPTION(DM_NAME " cache target");
3863MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3864MODULE_LICENSE("GPL");
3865