1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26
27
28#define IOT_RESOLUTION 4
29
30struct io_tracker {
31 spinlock_t lock;
32
33
34
35
36 sector_t in_flight;
37
38
39
40
41
42 unsigned long idle_time;
43 unsigned long last_update_time;
44};
45
46static void iot_init(struct io_tracker *iot)
47{
48 spin_lock_init(&iot->lock);
49 iot->in_flight = 0ul;
50 iot->idle_time = 0ul;
51 iot->last_update_time = jiffies;
52}
53
54static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
55{
56 if (iot->in_flight)
57 return false;
58
59 return time_after(jiffies, iot->idle_time + jifs);
60}
61
62static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
63{
64 bool r;
65 unsigned long flags;
66
67 spin_lock_irqsave(&iot->lock, flags);
68 r = __iot_idle_for(iot, jifs);
69 spin_unlock_irqrestore(&iot->lock, flags);
70
71 return r;
72}
73
74static void iot_io_begin(struct io_tracker *iot, sector_t len)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&iot->lock, flags);
79 iot->in_flight += len;
80 spin_unlock_irqrestore(&iot->lock, flags);
81}
82
83static void __iot_io_end(struct io_tracker *iot, sector_t len)
84{
85 iot->in_flight -= len;
86 if (!iot->in_flight)
87 iot->idle_time = jiffies;
88}
89
90static void iot_io_end(struct io_tracker *iot, sector_t len)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&iot->lock, flags);
95 __iot_io_end(iot, len);
96 spin_unlock_irqrestore(&iot->lock, flags);
97}
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119struct dm_hook_info {
120 bio_end_io_t *bi_end_io;
121};
122
123static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
124 bio_end_io_t *bi_end_io, void *bi_private)
125{
126 h->bi_end_io = bio->bi_end_io;
127
128 bio->bi_end_io = bi_end_io;
129 bio->bi_private = bi_private;
130}
131
132static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
133{
134 bio->bi_end_io = h->bi_end_io;
135}
136
137
138
139#define MIGRATION_POOL_SIZE 128
140#define COMMIT_PERIOD HZ
141#define MIGRATION_COUNT_WINDOW 10
142
143
144
145
146
147#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
148#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
149
150enum cache_metadata_mode {
151 CM_WRITE,
152 CM_READ_ONLY,
153 CM_FAIL
154};
155
156enum cache_io_mode {
157
158
159
160
161
162 CM_IO_WRITEBACK,
163
164
165
166
167
168 CM_IO_WRITETHROUGH,
169
170
171
172
173
174
175
176 CM_IO_PASSTHROUGH
177};
178
179struct cache_features {
180 enum cache_metadata_mode mode;
181 enum cache_io_mode io_mode;
182};
183
184struct cache_stats {
185 atomic_t read_hit;
186 atomic_t read_miss;
187 atomic_t write_hit;
188 atomic_t write_miss;
189 atomic_t demotion;
190 atomic_t promotion;
191 atomic_t copies_avoided;
192 atomic_t cache_cell_clash;
193 atomic_t commit_count;
194 atomic_t discard_count;
195};
196
197
198
199
200
201struct cblock_range {
202 dm_cblock_t begin;
203 dm_cblock_t end;
204};
205
206struct invalidation_request {
207 struct list_head list;
208 struct cblock_range *cblocks;
209
210 atomic_t complete;
211 int err;
212
213 wait_queue_head_t result_wait;
214};
215
216struct cache {
217 struct dm_target *ti;
218 struct dm_target_callbacks callbacks;
219
220 struct dm_cache_metadata *cmd;
221
222
223
224
225 struct dm_dev *metadata_dev;
226
227
228
229
230 struct dm_dev *origin_dev;
231
232
233
234
235 struct dm_dev *cache_dev;
236
237
238
239
240 dm_oblock_t origin_blocks;
241 sector_t origin_sectors;
242
243
244
245
246 dm_cblock_t cache_size;
247
248
249
250
251 uint32_t sectors_per_block;
252 int sectors_per_block_shift;
253
254 spinlock_t lock;
255 struct list_head deferred_cells;
256 struct bio_list deferred_bios;
257 struct bio_list deferred_flush_bios;
258 struct bio_list deferred_writethrough_bios;
259 struct list_head quiesced_migrations;
260 struct list_head completed_migrations;
261 struct list_head need_commit_migrations;
262 sector_t migration_threshold;
263 wait_queue_head_t migration_wait;
264 atomic_t nr_allocated_migrations;
265
266
267
268
269
270 atomic_t nr_io_migrations;
271
272 wait_queue_head_t quiescing_wait;
273 atomic_t quiescing;
274 atomic_t quiescing_ack;
275
276
277
278
279 atomic_t nr_dirty;
280 unsigned long *dirty_bitset;
281
282
283
284
285 dm_dblock_t discard_nr_blocks;
286 unsigned long *discard_bitset;
287 uint32_t discard_block_size;
288
289
290
291
292
293 unsigned nr_ctr_args;
294 const char **ctr_args;
295
296 struct dm_kcopyd_client *copier;
297 struct workqueue_struct *wq;
298 struct work_struct worker;
299
300 struct delayed_work waker;
301 unsigned long last_commit_jiffies;
302
303 struct dm_bio_prison *prison;
304 struct dm_deferred_set *all_io_ds;
305
306 mempool_t *migration_pool;
307
308 struct dm_cache_policy *policy;
309 unsigned policy_nr_args;
310
311 bool need_tick_bio:1;
312 bool sized:1;
313 bool invalidate:1;
314 bool commit_requested:1;
315 bool loaded_mappings:1;
316 bool loaded_discards:1;
317
318
319
320
321 struct cache_features features;
322
323 struct cache_stats stats;
324
325
326
327
328 spinlock_t invalidation_lock;
329 struct list_head invalidation_requests;
330
331 struct io_tracker origin_tracker;
332};
333
334struct per_bio_data {
335 bool tick:1;
336 unsigned req_nr:2;
337 struct dm_deferred_entry *all_io_entry;
338 struct dm_hook_info hook_info;
339 sector_t len;
340
341
342
343
344
345
346 struct cache *cache;
347 dm_cblock_t cblock;
348 struct dm_bio_details bio_details;
349};
350
351struct dm_cache_migration {
352 struct list_head list;
353 struct cache *cache;
354
355 unsigned long start_jiffies;
356 dm_oblock_t old_oblock;
357 dm_oblock_t new_oblock;
358 dm_cblock_t cblock;
359
360 bool err:1;
361 bool discard:1;
362 bool writeback:1;
363 bool demote:1;
364 bool promote:1;
365 bool requeue_holder:1;
366 bool invalidate:1;
367
368 struct dm_bio_prison_cell *old_ocell;
369 struct dm_bio_prison_cell *new_ocell;
370};
371
372
373
374
375
376
377struct prealloc {
378 struct dm_cache_migration *mg;
379 struct dm_bio_prison_cell *cell1;
380 struct dm_bio_prison_cell *cell2;
381};
382
383static enum cache_metadata_mode get_cache_mode(struct cache *cache);
384
385static void wake_worker(struct cache *cache)
386{
387 queue_work(cache->wq, &cache->worker);
388}
389
390
391
392static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
393{
394
395 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
396}
397
398static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
399{
400 dm_bio_prison_free_cell(cache->prison, cell);
401}
402
403static struct dm_cache_migration *alloc_migration(struct cache *cache)
404{
405 struct dm_cache_migration *mg;
406
407 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
408 if (mg) {
409 mg->cache = cache;
410 atomic_inc(&mg->cache->nr_allocated_migrations);
411 }
412
413 return mg;
414}
415
416static void free_migration(struct dm_cache_migration *mg)
417{
418 struct cache *cache = mg->cache;
419
420 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
421 wake_up(&cache->migration_wait);
422
423 mempool_free(mg, cache->migration_pool);
424}
425
426static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
427{
428 if (!p->mg) {
429 p->mg = alloc_migration(cache);
430 if (!p->mg)
431 return -ENOMEM;
432 }
433
434 if (!p->cell1) {
435 p->cell1 = alloc_prison_cell(cache);
436 if (!p->cell1)
437 return -ENOMEM;
438 }
439
440 if (!p->cell2) {
441 p->cell2 = alloc_prison_cell(cache);
442 if (!p->cell2)
443 return -ENOMEM;
444 }
445
446 return 0;
447}
448
449static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
450{
451 if (p->cell2)
452 free_prison_cell(cache, p->cell2);
453
454 if (p->cell1)
455 free_prison_cell(cache, p->cell1);
456
457 if (p->mg)
458 free_migration(p->mg);
459}
460
461static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
462{
463 struct dm_cache_migration *mg = p->mg;
464
465 BUG_ON(!mg);
466 p->mg = NULL;
467
468 return mg;
469}
470
471
472
473
474
475static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
476{
477 struct dm_bio_prison_cell *r = NULL;
478
479 if (p->cell1) {
480 r = p->cell1;
481 p->cell1 = NULL;
482
483 } else if (p->cell2) {
484 r = p->cell2;
485 p->cell2 = NULL;
486 } else
487 BUG();
488
489 return r;
490}
491
492
493
494
495
496static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
497{
498 if (!p->cell2)
499 p->cell2 = cell;
500
501 else if (!p->cell1)
502 p->cell1 = cell;
503
504 else
505 BUG();
506}
507
508
509
510static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
511{
512 key->virtual = 0;
513 key->dev = 0;
514 key->block_begin = from_oblock(begin);
515 key->block_end = from_oblock(end);
516}
517
518
519
520
521
522
523typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
524
525static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
526 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
527 cell_free_fn free_fn, void *free_context,
528 struct dm_bio_prison_cell **cell_result)
529{
530 int r;
531 struct dm_cell_key key;
532
533 build_key(oblock_begin, oblock_end, &key);
534 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
535 if (r)
536 free_fn(free_context, cell_prealloc);
537
538 return r;
539}
540
541static int bio_detain(struct cache *cache, dm_oblock_t oblock,
542 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
543 cell_free_fn free_fn, void *free_context,
544 struct dm_bio_prison_cell **cell_result)
545{
546 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
547 return bio_detain_range(cache, oblock, end, bio,
548 cell_prealloc, free_fn, free_context, cell_result);
549}
550
551static int get_cell(struct cache *cache,
552 dm_oblock_t oblock,
553 struct prealloc *structs,
554 struct dm_bio_prison_cell **cell_result)
555{
556 int r;
557 struct dm_cell_key key;
558 struct dm_bio_prison_cell *cell_prealloc;
559
560 cell_prealloc = prealloc_get_cell(structs);
561
562 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
563 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
564 if (r)
565 prealloc_put_cell(structs, cell_prealloc);
566
567 return r;
568}
569
570
571
572static bool is_dirty(struct cache *cache, dm_cblock_t b)
573{
574 return test_bit(from_cblock(b), cache->dirty_bitset);
575}
576
577static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
578{
579 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
580 atomic_inc(&cache->nr_dirty);
581 policy_set_dirty(cache->policy, oblock);
582 }
583}
584
585static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
586{
587 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
588 policy_clear_dirty(cache->policy, oblock);
589 if (atomic_dec_return(&cache->nr_dirty) == 0)
590 dm_table_event(cache->ti->table);
591 }
592}
593
594
595
596static bool block_size_is_power_of_two(struct cache *cache)
597{
598 return cache->sectors_per_block_shift >= 0;
599}
600
601
602#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
603__always_inline
604#endif
605static dm_block_t block_div(dm_block_t b, uint32_t n)
606{
607 do_div(b, n);
608
609 return b;
610}
611
612static dm_block_t oblocks_per_dblock(struct cache *cache)
613{
614 dm_block_t oblocks = cache->discard_block_size;
615
616 if (block_size_is_power_of_two(cache))
617 oblocks >>= cache->sectors_per_block_shift;
618 else
619 oblocks = block_div(oblocks, cache->sectors_per_block);
620
621 return oblocks;
622}
623
624static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
625{
626 return to_dblock(block_div(from_oblock(oblock),
627 oblocks_per_dblock(cache)));
628}
629
630static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
631{
632 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
633}
634
635static void set_discard(struct cache *cache, dm_dblock_t b)
636{
637 unsigned long flags;
638
639 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
640 atomic_inc(&cache->stats.discard_count);
641
642 spin_lock_irqsave(&cache->lock, flags);
643 set_bit(from_dblock(b), cache->discard_bitset);
644 spin_unlock_irqrestore(&cache->lock, flags);
645}
646
647static void clear_discard(struct cache *cache, dm_dblock_t b)
648{
649 unsigned long flags;
650
651 spin_lock_irqsave(&cache->lock, flags);
652 clear_bit(from_dblock(b), cache->discard_bitset);
653 spin_unlock_irqrestore(&cache->lock, flags);
654}
655
656static bool is_discarded(struct cache *cache, dm_dblock_t b)
657{
658 int r;
659 unsigned long flags;
660
661 spin_lock_irqsave(&cache->lock, flags);
662 r = test_bit(from_dblock(b), cache->discard_bitset);
663 spin_unlock_irqrestore(&cache->lock, flags);
664
665 return r;
666}
667
668static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
669{
670 int r;
671 unsigned long flags;
672
673 spin_lock_irqsave(&cache->lock, flags);
674 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
675 cache->discard_bitset);
676 spin_unlock_irqrestore(&cache->lock, flags);
677
678 return r;
679}
680
681
682
683static void load_stats(struct cache *cache)
684{
685 struct dm_cache_statistics stats;
686
687 dm_cache_metadata_get_stats(cache->cmd, &stats);
688 atomic_set(&cache->stats.read_hit, stats.read_hits);
689 atomic_set(&cache->stats.read_miss, stats.read_misses);
690 atomic_set(&cache->stats.write_hit, stats.write_hits);
691 atomic_set(&cache->stats.write_miss, stats.write_misses);
692}
693
694static void save_stats(struct cache *cache)
695{
696 struct dm_cache_statistics stats;
697
698 if (get_cache_mode(cache) >= CM_READ_ONLY)
699 return;
700
701 stats.read_hits = atomic_read(&cache->stats.read_hit);
702 stats.read_misses = atomic_read(&cache->stats.read_miss);
703 stats.write_hits = atomic_read(&cache->stats.write_hit);
704 stats.write_misses = atomic_read(&cache->stats.write_miss);
705
706 dm_cache_metadata_set_stats(cache->cmd, &stats);
707}
708
709
710
711
712
713
714
715
716#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
717#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
718
719static bool writethrough_mode(struct cache_features *f)
720{
721 return f->io_mode == CM_IO_WRITETHROUGH;
722}
723
724static bool writeback_mode(struct cache_features *f)
725{
726 return f->io_mode == CM_IO_WRITEBACK;
727}
728
729static bool passthrough_mode(struct cache_features *f)
730{
731 return f->io_mode == CM_IO_PASSTHROUGH;
732}
733
734static size_t get_per_bio_data_size(struct cache *cache)
735{
736 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
737}
738
739static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
740{
741 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
742 BUG_ON(!pb);
743 return pb;
744}
745
746static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
747{
748 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
749
750 pb->tick = false;
751 pb->req_nr = dm_bio_get_target_bio_nr(bio);
752 pb->all_io_entry = NULL;
753 pb->len = 0;
754
755 return pb;
756}
757
758
759
760
761static void remap_to_origin(struct cache *cache, struct bio *bio)
762{
763 bio->bi_bdev = cache->origin_dev->bdev;
764}
765
766static void remap_to_cache(struct cache *cache, struct bio *bio,
767 dm_cblock_t cblock)
768{
769 sector_t bi_sector = bio->bi_iter.bi_sector;
770 sector_t block = from_cblock(cblock);
771
772 bio->bi_bdev = cache->cache_dev->bdev;
773 if (!block_size_is_power_of_two(cache))
774 bio->bi_iter.bi_sector =
775 (block * cache->sectors_per_block) +
776 sector_div(bi_sector, cache->sectors_per_block);
777 else
778 bio->bi_iter.bi_sector =
779 (block << cache->sectors_per_block_shift) |
780 (bi_sector & (cache->sectors_per_block - 1));
781}
782
783static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
784{
785 unsigned long flags;
786 size_t pb_data_size = get_per_bio_data_size(cache);
787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
788
789 spin_lock_irqsave(&cache->lock, flags);
790 if (cache->need_tick_bio &&
791 !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
792 bio_op(bio) != REQ_OP_DISCARD) {
793 pb->tick = true;
794 cache->need_tick_bio = false;
795 }
796 spin_unlock_irqrestore(&cache->lock, flags);
797}
798
799static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
800 dm_oblock_t oblock)
801{
802 check_if_tick_bio_needed(cache, bio);
803 remap_to_origin(cache, bio);
804 if (bio_data_dir(bio) == WRITE)
805 clear_discard(cache, oblock_to_dblock(cache, oblock));
806}
807
808static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
809 dm_oblock_t oblock, dm_cblock_t cblock)
810{
811 check_if_tick_bio_needed(cache, bio);
812 remap_to_cache(cache, bio, cblock);
813 if (bio_data_dir(bio) == WRITE) {
814 set_dirty(cache, oblock, cblock);
815 clear_discard(cache, oblock_to_dblock(cache, oblock));
816 }
817}
818
819static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
820{
821 sector_t block_nr = bio->bi_iter.bi_sector;
822
823 if (!block_size_is_power_of_two(cache))
824 (void) sector_div(block_nr, cache->sectors_per_block);
825 else
826 block_nr >>= cache->sectors_per_block_shift;
827
828 return to_oblock(block_nr);
829}
830
831static int bio_triggers_commit(struct cache *cache, struct bio *bio)
832{
833 return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
834}
835
836
837
838
839
840static void inc_ds(struct cache *cache, struct bio *bio,
841 struct dm_bio_prison_cell *cell)
842{
843 size_t pb_data_size = get_per_bio_data_size(cache);
844 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
845
846 BUG_ON(!cell);
847 BUG_ON(pb->all_io_entry);
848
849 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
850}
851
852static bool accountable_bio(struct cache *cache, struct bio *bio)
853{
854 return ((bio->bi_bdev == cache->origin_dev->bdev) &&
855 bio_op(bio) != REQ_OP_DISCARD);
856}
857
858static void accounted_begin(struct cache *cache, struct bio *bio)
859{
860 size_t pb_data_size = get_per_bio_data_size(cache);
861 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
862
863 if (accountable_bio(cache, bio)) {
864 pb->len = bio_sectors(bio);
865 iot_io_begin(&cache->origin_tracker, pb->len);
866 }
867}
868
869static void accounted_complete(struct cache *cache, struct bio *bio)
870{
871 size_t pb_data_size = get_per_bio_data_size(cache);
872 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
873
874 iot_io_end(&cache->origin_tracker, pb->len);
875}
876
877static void accounted_request(struct cache *cache, struct bio *bio)
878{
879 accounted_begin(cache, bio);
880 generic_make_request(bio);
881}
882
883static void issue(struct cache *cache, struct bio *bio)
884{
885 unsigned long flags;
886
887 if (!bio_triggers_commit(cache, bio)) {
888 accounted_request(cache, bio);
889 return;
890 }
891
892
893
894
895
896 spin_lock_irqsave(&cache->lock, flags);
897 cache->commit_requested = true;
898 bio_list_add(&cache->deferred_flush_bios, bio);
899 spin_unlock_irqrestore(&cache->lock, flags);
900}
901
902static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
903{
904 inc_ds(cache, bio, cell);
905 issue(cache, bio);
906}
907
908static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
909{
910 unsigned long flags;
911
912 spin_lock_irqsave(&cache->lock, flags);
913 bio_list_add(&cache->deferred_writethrough_bios, bio);
914 spin_unlock_irqrestore(&cache->lock, flags);
915
916 wake_worker(cache);
917}
918
919static void writethrough_endio(struct bio *bio)
920{
921 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
922
923 dm_unhook_bio(&pb->hook_info, bio);
924
925 if (bio->bi_error) {
926 bio_endio(bio);
927 return;
928 }
929
930 dm_bio_restore(&pb->bio_details, bio);
931 remap_to_cache(pb->cache, bio, pb->cblock);
932
933
934
935
936
937
938 defer_writethrough_bio(pb->cache, bio);
939}
940
941
942
943
944
945
946
947static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
948 dm_oblock_t oblock, dm_cblock_t cblock)
949{
950 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
951
952 pb->cache = cache;
953 pb->cblock = cblock;
954 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
955 dm_bio_record(&pb->bio_details, bio);
956
957 remap_to_origin_clear_discard(pb->cache, bio, oblock);
958}
959
960
961
962
963static enum cache_metadata_mode get_cache_mode(struct cache *cache)
964{
965 return cache->features.mode;
966}
967
968static const char *cache_device_name(struct cache *cache)
969{
970 return dm_device_name(dm_table_get_md(cache->ti->table));
971}
972
973static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
974{
975 const char *descs[] = {
976 "write",
977 "read-only",
978 "fail"
979 };
980
981 dm_table_event(cache->ti->table);
982 DMINFO("%s: switching cache to %s mode",
983 cache_device_name(cache), descs[(int)mode]);
984}
985
986static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
987{
988 bool needs_check;
989 enum cache_metadata_mode old_mode = get_cache_mode(cache);
990
991 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
992 DMERR("%s: unable to read needs_check flag, setting failure mode.",
993 cache_device_name(cache));
994 new_mode = CM_FAIL;
995 }
996
997 if (new_mode == CM_WRITE && needs_check) {
998 DMERR("%s: unable to switch cache to write mode until repaired.",
999 cache_device_name(cache));
1000 if (old_mode != new_mode)
1001 new_mode = old_mode;
1002 else
1003 new_mode = CM_READ_ONLY;
1004 }
1005
1006
1007 if (old_mode == CM_FAIL)
1008 new_mode = CM_FAIL;
1009
1010 switch (new_mode) {
1011 case CM_FAIL:
1012 case CM_READ_ONLY:
1013 dm_cache_metadata_set_read_only(cache->cmd);
1014 break;
1015
1016 case CM_WRITE:
1017 dm_cache_metadata_set_read_write(cache->cmd);
1018 break;
1019 }
1020
1021 cache->features.mode = new_mode;
1022
1023 if (new_mode != old_mode)
1024 notify_mode_switch(cache, new_mode);
1025}
1026
1027static void abort_transaction(struct cache *cache)
1028{
1029 const char *dev_name = cache_device_name(cache);
1030
1031 if (get_cache_mode(cache) >= CM_READ_ONLY)
1032 return;
1033
1034 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
1035 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
1036 set_cache_mode(cache, CM_FAIL);
1037 }
1038
1039 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1040 if (dm_cache_metadata_abort(cache->cmd)) {
1041 DMERR("%s: failed to abort metadata transaction", dev_name);
1042 set_cache_mode(cache, CM_FAIL);
1043 }
1044}
1045
1046static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1047{
1048 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1049 cache_device_name(cache), op, r);
1050 abort_transaction(cache);
1051 set_cache_mode(cache, CM_READ_ONLY);
1052}
1053
1054
1055
1056
1057
1058
1059
1060static void inc_io_migrations(struct cache *cache)
1061{
1062 atomic_inc(&cache->nr_io_migrations);
1063}
1064
1065static void dec_io_migrations(struct cache *cache)
1066{
1067 atomic_dec(&cache->nr_io_migrations);
1068}
1069
1070static bool discard_or_flush(struct bio *bio)
1071{
1072 return bio_op(bio) == REQ_OP_DISCARD ||
1073 bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
1074}
1075
1076static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
1077{
1078 if (discard_or_flush(cell->holder)) {
1079
1080
1081
1082 dm_cell_release(cache->prison, cell, &cache->deferred_bios);
1083 free_prison_cell(cache, cell);
1084 } else
1085 list_add_tail(&cell->user_list, &cache->deferred_cells);
1086}
1087
1088static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder)
1089{
1090 unsigned long flags;
1091
1092 if (!holder && dm_cell_promote_or_release(cache->prison, cell)) {
1093
1094
1095
1096
1097 free_prison_cell(cache, cell);
1098 return;
1099 }
1100
1101 spin_lock_irqsave(&cache->lock, flags);
1102 __cell_defer(cache, cell);
1103 spin_unlock_irqrestore(&cache->lock, flags);
1104
1105 wake_worker(cache);
1106}
1107
1108static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err)
1109{
1110 dm_cell_error(cache->prison, cell, err);
1111 free_prison_cell(cache, cell);
1112}
1113
1114static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)
1115{
1116 cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE);
1117}
1118
1119static void free_io_migration(struct dm_cache_migration *mg)
1120{
1121 struct cache *cache = mg->cache;
1122
1123 dec_io_migrations(cache);
1124 free_migration(mg);
1125 wake_worker(cache);
1126}
1127
1128static void migration_failure(struct dm_cache_migration *mg)
1129{
1130 struct cache *cache = mg->cache;
1131 const char *dev_name = cache_device_name(cache);
1132
1133 if (mg->writeback) {
1134 DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name);
1135 set_dirty(cache, mg->old_oblock, mg->cblock);
1136 cell_defer(cache, mg->old_ocell, false);
1137
1138 } else if (mg->demote) {
1139 DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name);
1140 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
1141
1142 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1143 if (mg->promote)
1144 cell_defer(cache, mg->new_ocell, true);
1145 } else {
1146 DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name);
1147 policy_remove_mapping(cache->policy, mg->new_oblock);
1148 cell_defer(cache, mg->new_ocell, true);
1149 }
1150
1151 free_io_migration(mg);
1152}
1153
1154static void migration_success_pre_commit(struct dm_cache_migration *mg)
1155{
1156 int r;
1157 unsigned long flags;
1158 struct cache *cache = mg->cache;
1159
1160 if (mg->writeback) {
1161 clear_dirty(cache, mg->old_oblock, mg->cblock);
1162 cell_defer(cache, mg->old_ocell, false);
1163 free_io_migration(mg);
1164 return;
1165
1166 } else if (mg->demote) {
1167 r = dm_cache_remove_mapping(cache->cmd, mg->cblock);
1168 if (r) {
1169 DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata",
1170 cache_device_name(cache));
1171 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1172 policy_force_mapping(cache->policy, mg->new_oblock,
1173 mg->old_oblock);
1174 if (mg->promote)
1175 cell_defer(cache, mg->new_ocell, true);
1176 free_io_migration(mg);
1177 return;
1178 }
1179 } else {
1180 r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock);
1181 if (r) {
1182 DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata",
1183 cache_device_name(cache));
1184 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1185 policy_remove_mapping(cache->policy, mg->new_oblock);
1186 free_io_migration(mg);
1187 return;
1188 }
1189 }
1190
1191 spin_lock_irqsave(&cache->lock, flags);
1192 list_add_tail(&mg->list, &cache->need_commit_migrations);
1193 cache->commit_requested = true;
1194 spin_unlock_irqrestore(&cache->lock, flags);
1195}
1196
1197static void migration_success_post_commit(struct dm_cache_migration *mg)
1198{
1199 unsigned long flags;
1200 struct cache *cache = mg->cache;
1201
1202 if (mg->writeback) {
1203 DMWARN_LIMIT("%s: writeback unexpectedly triggered commit",
1204 cache_device_name(cache));
1205 return;
1206
1207 } else if (mg->demote) {
1208 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1209
1210 if (mg->promote) {
1211 mg->demote = false;
1212
1213 spin_lock_irqsave(&cache->lock, flags);
1214 list_add_tail(&mg->list, &cache->quiesced_migrations);
1215 spin_unlock_irqrestore(&cache->lock, flags);
1216
1217 } else {
1218 if (mg->invalidate)
1219 policy_remove_mapping(cache->policy, mg->old_oblock);
1220 free_io_migration(mg);
1221 }
1222
1223 } else {
1224 if (mg->requeue_holder) {
1225 clear_dirty(cache, mg->new_oblock, mg->cblock);
1226 cell_defer(cache, mg->new_ocell, true);
1227 } else {
1228
1229
1230
1231 set_dirty(cache, mg->new_oblock, mg->cblock);
1232 bio_endio(mg->new_ocell->holder);
1233 cell_defer(cache, mg->new_ocell, false);
1234 }
1235 free_io_migration(mg);
1236 }
1237}
1238
1239static void copy_complete(int read_err, unsigned long write_err, void *context)
1240{
1241 unsigned long flags;
1242 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1243 struct cache *cache = mg->cache;
1244
1245 if (read_err || write_err)
1246 mg->err = true;
1247
1248 spin_lock_irqsave(&cache->lock, flags);
1249 list_add_tail(&mg->list, &cache->completed_migrations);
1250 spin_unlock_irqrestore(&cache->lock, flags);
1251
1252 wake_worker(cache);
1253}
1254
1255static void issue_copy(struct dm_cache_migration *mg)
1256{
1257 int r;
1258 struct dm_io_region o_region, c_region;
1259 struct cache *cache = mg->cache;
1260 sector_t cblock = from_cblock(mg->cblock);
1261
1262 o_region.bdev = cache->origin_dev->bdev;
1263 o_region.count = cache->sectors_per_block;
1264
1265 c_region.bdev = cache->cache_dev->bdev;
1266 c_region.sector = cblock * cache->sectors_per_block;
1267 c_region.count = cache->sectors_per_block;
1268
1269 if (mg->writeback || mg->demote) {
1270
1271 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1272 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1273 } else {
1274
1275 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1276 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1277 }
1278
1279 if (r < 0) {
1280 DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache));
1281 migration_failure(mg);
1282 }
1283}
1284
1285static void overwrite_endio(struct bio *bio)
1286{
1287 struct dm_cache_migration *mg = bio->bi_private;
1288 struct cache *cache = mg->cache;
1289 size_t pb_data_size = get_per_bio_data_size(cache);
1290 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1291 unsigned long flags;
1292
1293 dm_unhook_bio(&pb->hook_info, bio);
1294
1295 if (bio->bi_error)
1296 mg->err = true;
1297
1298 mg->requeue_holder = false;
1299
1300 spin_lock_irqsave(&cache->lock, flags);
1301 list_add_tail(&mg->list, &cache->completed_migrations);
1302 spin_unlock_irqrestore(&cache->lock, flags);
1303
1304 wake_worker(cache);
1305}
1306
1307static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1308{
1309 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1310 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1311
1312 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1313 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1314
1315
1316
1317
1318
1319 accounted_request(mg->cache, bio);
1320}
1321
1322static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1323{
1324 return (bio_data_dir(bio) == WRITE) &&
1325 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1326}
1327
1328static void avoid_copy(struct dm_cache_migration *mg)
1329{
1330 atomic_inc(&mg->cache->stats.copies_avoided);
1331 migration_success_pre_commit(mg);
1332}
1333
1334static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1335 dm_dblock_t *b, dm_dblock_t *e)
1336{
1337 sector_t sb = bio->bi_iter.bi_sector;
1338 sector_t se = bio_end_sector(bio);
1339
1340 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1341
1342 if (se - sb < cache->discard_block_size)
1343 *e = *b;
1344 else
1345 *e = to_dblock(block_div(se, cache->discard_block_size));
1346}
1347
1348static void issue_discard(struct dm_cache_migration *mg)
1349{
1350 dm_dblock_t b, e;
1351 struct bio *bio = mg->new_ocell->holder;
1352 struct cache *cache = mg->cache;
1353
1354 calc_discard_block_range(cache, bio, &b, &e);
1355 while (b != e) {
1356 set_discard(cache, b);
1357 b = to_dblock(from_dblock(b) + 1);
1358 }
1359
1360 bio_endio(bio);
1361 cell_defer(cache, mg->new_ocell, false);
1362 free_migration(mg);
1363 wake_worker(cache);
1364}
1365
1366static void issue_copy_or_discard(struct dm_cache_migration *mg)
1367{
1368 bool avoid;
1369 struct cache *cache = mg->cache;
1370
1371 if (mg->discard) {
1372 issue_discard(mg);
1373 return;
1374 }
1375
1376 if (mg->writeback || mg->demote)
1377 avoid = !is_dirty(cache, mg->cblock) ||
1378 is_discarded_oblock(cache, mg->old_oblock);
1379 else {
1380 struct bio *bio = mg->new_ocell->holder;
1381
1382 avoid = is_discarded_oblock(cache, mg->new_oblock);
1383
1384 if (writeback_mode(&cache->features) &&
1385 !avoid && bio_writes_complete_block(cache, bio)) {
1386 issue_overwrite(mg, bio);
1387 return;
1388 }
1389 }
1390
1391 avoid ? avoid_copy(mg) : issue_copy(mg);
1392}
1393
1394static void complete_migration(struct dm_cache_migration *mg)
1395{
1396 if (mg->err)
1397 migration_failure(mg);
1398 else
1399 migration_success_pre_commit(mg);
1400}
1401
1402static void process_migrations(struct cache *cache, struct list_head *head,
1403 void (*fn)(struct dm_cache_migration *))
1404{
1405 unsigned long flags;
1406 struct list_head list;
1407 struct dm_cache_migration *mg, *tmp;
1408
1409 INIT_LIST_HEAD(&list);
1410 spin_lock_irqsave(&cache->lock, flags);
1411 list_splice_init(head, &list);
1412 spin_unlock_irqrestore(&cache->lock, flags);
1413
1414 list_for_each_entry_safe(mg, tmp, &list, list)
1415 fn(mg);
1416}
1417
1418static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1419{
1420 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1421}
1422
1423static void queue_quiesced_migration(struct dm_cache_migration *mg)
1424{
1425 unsigned long flags;
1426 struct cache *cache = mg->cache;
1427
1428 spin_lock_irqsave(&cache->lock, flags);
1429 __queue_quiesced_migration(mg);
1430 spin_unlock_irqrestore(&cache->lock, flags);
1431
1432 wake_worker(cache);
1433}
1434
1435static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1436{
1437 unsigned long flags;
1438 struct dm_cache_migration *mg, *tmp;
1439
1440 spin_lock_irqsave(&cache->lock, flags);
1441 list_for_each_entry_safe(mg, tmp, work, list)
1442 __queue_quiesced_migration(mg);
1443 spin_unlock_irqrestore(&cache->lock, flags);
1444
1445 wake_worker(cache);
1446}
1447
1448static void check_for_quiesced_migrations(struct cache *cache,
1449 struct per_bio_data *pb)
1450{
1451 struct list_head work;
1452
1453 if (!pb->all_io_entry)
1454 return;
1455
1456 INIT_LIST_HEAD(&work);
1457 dm_deferred_entry_dec(pb->all_io_entry, &work);
1458
1459 if (!list_empty(&work))
1460 queue_quiesced_migrations(cache, &work);
1461}
1462
1463static void quiesce_migration(struct dm_cache_migration *mg)
1464{
1465 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1466 queue_quiesced_migration(mg);
1467}
1468
1469static void promote(struct cache *cache, struct prealloc *structs,
1470 dm_oblock_t oblock, dm_cblock_t cblock,
1471 struct dm_bio_prison_cell *cell)
1472{
1473 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1474
1475 mg->err = false;
1476 mg->discard = false;
1477 mg->writeback = false;
1478 mg->demote = false;
1479 mg->promote = true;
1480 mg->requeue_holder = true;
1481 mg->invalidate = false;
1482 mg->cache = cache;
1483 mg->new_oblock = oblock;
1484 mg->cblock = cblock;
1485 mg->old_ocell = NULL;
1486 mg->new_ocell = cell;
1487 mg->start_jiffies = jiffies;
1488
1489 inc_io_migrations(cache);
1490 quiesce_migration(mg);
1491}
1492
1493static void writeback(struct cache *cache, struct prealloc *structs,
1494 dm_oblock_t oblock, dm_cblock_t cblock,
1495 struct dm_bio_prison_cell *cell)
1496{
1497 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1498
1499 mg->err = false;
1500 mg->discard = false;
1501 mg->writeback = true;
1502 mg->demote = false;
1503 mg->promote = false;
1504 mg->requeue_holder = true;
1505 mg->invalidate = false;
1506 mg->cache = cache;
1507 mg->old_oblock = oblock;
1508 mg->cblock = cblock;
1509 mg->old_ocell = cell;
1510 mg->new_ocell = NULL;
1511 mg->start_jiffies = jiffies;
1512
1513 inc_io_migrations(cache);
1514 quiesce_migration(mg);
1515}
1516
1517static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1518 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1519 dm_cblock_t cblock,
1520 struct dm_bio_prison_cell *old_ocell,
1521 struct dm_bio_prison_cell *new_ocell)
1522{
1523 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1524
1525 mg->err = false;
1526 mg->discard = false;
1527 mg->writeback = false;
1528 mg->demote = true;
1529 mg->promote = true;
1530 mg->requeue_holder = true;
1531 mg->invalidate = false;
1532 mg->cache = cache;
1533 mg->old_oblock = old_oblock;
1534 mg->new_oblock = new_oblock;
1535 mg->cblock = cblock;
1536 mg->old_ocell = old_ocell;
1537 mg->new_ocell = new_ocell;
1538 mg->start_jiffies = jiffies;
1539
1540 inc_io_migrations(cache);
1541 quiesce_migration(mg);
1542}
1543
1544
1545
1546
1547
1548static void invalidate(struct cache *cache, struct prealloc *structs,
1549 dm_oblock_t oblock, dm_cblock_t cblock,
1550 struct dm_bio_prison_cell *cell)
1551{
1552 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1553
1554 mg->err = false;
1555 mg->discard = false;
1556 mg->writeback = false;
1557 mg->demote = true;
1558 mg->promote = false;
1559 mg->requeue_holder = true;
1560 mg->invalidate = true;
1561 mg->cache = cache;
1562 mg->old_oblock = oblock;
1563 mg->cblock = cblock;
1564 mg->old_ocell = cell;
1565 mg->new_ocell = NULL;
1566 mg->start_jiffies = jiffies;
1567
1568 inc_io_migrations(cache);
1569 quiesce_migration(mg);
1570}
1571
1572static void discard(struct cache *cache, struct prealloc *structs,
1573 struct dm_bio_prison_cell *cell)
1574{
1575 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1576
1577 mg->err = false;
1578 mg->discard = true;
1579 mg->writeback = false;
1580 mg->demote = false;
1581 mg->promote = false;
1582 mg->requeue_holder = false;
1583 mg->invalidate = false;
1584 mg->cache = cache;
1585 mg->old_ocell = NULL;
1586 mg->new_ocell = cell;
1587 mg->start_jiffies = jiffies;
1588
1589 quiesce_migration(mg);
1590}
1591
1592
1593
1594
1595static void defer_bio(struct cache *cache, struct bio *bio)
1596{
1597 unsigned long flags;
1598
1599 spin_lock_irqsave(&cache->lock, flags);
1600 bio_list_add(&cache->deferred_bios, bio);
1601 spin_unlock_irqrestore(&cache->lock, flags);
1602
1603 wake_worker(cache);
1604}
1605
1606static void process_flush_bio(struct cache *cache, struct bio *bio)
1607{
1608 size_t pb_data_size = get_per_bio_data_size(cache);
1609 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1610
1611 BUG_ON(bio->bi_iter.bi_size);
1612 if (!pb->req_nr)
1613 remap_to_origin(cache, bio);
1614 else
1615 remap_to_cache(cache, bio, 0);
1616
1617
1618
1619
1620
1621
1622 issue(cache, bio);
1623}
1624
1625static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1626 struct bio *bio)
1627{
1628 int r;
1629 dm_dblock_t b, e;
1630 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1631
1632 calc_discard_block_range(cache, bio, &b, &e);
1633 if (b == e) {
1634 bio_endio(bio);
1635 return;
1636 }
1637
1638 cell_prealloc = prealloc_get_cell(structs);
1639 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1640 (cell_free_fn) prealloc_put_cell,
1641 structs, &new_ocell);
1642 if (r > 0)
1643 return;
1644
1645 discard(cache, structs, new_ocell);
1646}
1647
1648static bool spare_migration_bandwidth(struct cache *cache)
1649{
1650 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1651 cache->sectors_per_block;
1652 return current_volume < cache->migration_threshold;
1653}
1654
1655static void inc_hit_counter(struct cache *cache, struct bio *bio)
1656{
1657 atomic_inc(bio_data_dir(bio) == READ ?
1658 &cache->stats.read_hit : &cache->stats.write_hit);
1659}
1660
1661static void inc_miss_counter(struct cache *cache, struct bio *bio)
1662{
1663 atomic_inc(bio_data_dir(bio) == READ ?
1664 &cache->stats.read_miss : &cache->stats.write_miss);
1665}
1666
1667
1668
1669struct inc_detail {
1670 struct cache *cache;
1671 struct bio_list bios_for_issue;
1672 struct bio_list unhandled_bios;
1673 bool any_writes;
1674};
1675
1676static void inc_fn(void *context, struct dm_bio_prison_cell *cell)
1677{
1678 struct bio *bio;
1679 struct inc_detail *detail = context;
1680 struct cache *cache = detail->cache;
1681
1682 inc_ds(cache, cell->holder, cell);
1683 if (bio_data_dir(cell->holder) == WRITE)
1684 detail->any_writes = true;
1685
1686 while ((bio = bio_list_pop(&cell->bios))) {
1687 if (discard_or_flush(bio)) {
1688 bio_list_add(&detail->unhandled_bios, bio);
1689 continue;
1690 }
1691
1692 if (bio_data_dir(bio) == WRITE)
1693 detail->any_writes = true;
1694
1695 bio_list_add(&detail->bios_for_issue, bio);
1696 inc_ds(cache, bio, cell);
1697 }
1698}
1699
1700
1701static void remap_cell_to_origin_clear_discard(struct cache *cache,
1702 struct dm_bio_prison_cell *cell,
1703 dm_oblock_t oblock, bool issue_holder)
1704{
1705 struct bio *bio;
1706 unsigned long flags;
1707 struct inc_detail detail;
1708
1709 detail.cache = cache;
1710 bio_list_init(&detail.bios_for_issue);
1711 bio_list_init(&detail.unhandled_bios);
1712 detail.any_writes = false;
1713
1714 spin_lock_irqsave(&cache->lock, flags);
1715 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1716 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1717 spin_unlock_irqrestore(&cache->lock, flags);
1718
1719 remap_to_origin(cache, cell->holder);
1720 if (issue_holder)
1721 issue(cache, cell->holder);
1722 else
1723 accounted_begin(cache, cell->holder);
1724
1725 if (detail.any_writes)
1726 clear_discard(cache, oblock_to_dblock(cache, oblock));
1727
1728 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1729 remap_to_origin(cache, bio);
1730 issue(cache, bio);
1731 }
1732
1733 free_prison_cell(cache, cell);
1734}
1735
1736static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell,
1737 dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder)
1738{
1739 struct bio *bio;
1740 unsigned long flags;
1741 struct inc_detail detail;
1742
1743 detail.cache = cache;
1744 bio_list_init(&detail.bios_for_issue);
1745 bio_list_init(&detail.unhandled_bios);
1746 detail.any_writes = false;
1747
1748 spin_lock_irqsave(&cache->lock, flags);
1749 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1750 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1751 spin_unlock_irqrestore(&cache->lock, flags);
1752
1753 remap_to_cache(cache, cell->holder, cblock);
1754 if (issue_holder)
1755 issue(cache, cell->holder);
1756 else
1757 accounted_begin(cache, cell->holder);
1758
1759 if (detail.any_writes) {
1760 set_dirty(cache, oblock, cblock);
1761 clear_discard(cache, oblock_to_dblock(cache, oblock));
1762 }
1763
1764 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1765 remap_to_cache(cache, bio, cblock);
1766 issue(cache, bio);
1767 }
1768
1769 free_prison_cell(cache, cell);
1770}
1771
1772
1773
1774struct old_oblock_lock {
1775 struct policy_locker locker;
1776 struct cache *cache;
1777 struct prealloc *structs;
1778 struct dm_bio_prison_cell *cell;
1779};
1780
1781static int null_locker(struct policy_locker *locker, dm_oblock_t b)
1782{
1783
1784 BUG();
1785 return 0;
1786}
1787
1788static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
1789{
1790 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
1791 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
1792
1793 return bio_detain(l->cache, b, NULL, cell_prealloc,
1794 (cell_free_fn) prealloc_put_cell,
1795 l->structs, &l->cell);
1796}
1797
1798static void process_cell(struct cache *cache, struct prealloc *structs,
1799 struct dm_bio_prison_cell *new_ocell)
1800{
1801 int r;
1802 bool release_cell = true;
1803 struct bio *bio = new_ocell->holder;
1804 dm_oblock_t block = get_bio_block(cache, bio);
1805 struct policy_result lookup_result;
1806 bool passthrough = passthrough_mode(&cache->features);
1807 bool fast_promotion, can_migrate;
1808 struct old_oblock_lock ool;
1809
1810 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
1811 can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache));
1812
1813 ool.locker.fn = cell_locker;
1814 ool.cache = cache;
1815 ool.structs = structs;
1816 ool.cell = NULL;
1817 r = policy_map(cache->policy, block, true, can_migrate, fast_promotion,
1818 bio, &ool.locker, &lookup_result);
1819
1820 if (r == -EWOULDBLOCK)
1821
1822 lookup_result.op = POLICY_MISS;
1823
1824 switch (lookup_result.op) {
1825 case POLICY_HIT:
1826 if (passthrough) {
1827 inc_miss_counter(cache, bio);
1828
1829
1830
1831
1832
1833
1834
1835 if (bio_data_dir(bio) == WRITE) {
1836 atomic_inc(&cache->stats.demotion);
1837 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1838 release_cell = false;
1839
1840 } else {
1841
1842 remap_to_origin_clear_discard(cache, bio, block);
1843 inc_and_issue(cache, bio, new_ocell);
1844 }
1845 } else {
1846 inc_hit_counter(cache, bio);
1847
1848 if (bio_data_dir(bio) == WRITE &&
1849 writethrough_mode(&cache->features) &&
1850 !is_dirty(cache, lookup_result.cblock)) {
1851 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1852 inc_and_issue(cache, bio, new_ocell);
1853
1854 } else {
1855 remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true);
1856 release_cell = false;
1857 }
1858 }
1859
1860 break;
1861
1862 case POLICY_MISS:
1863 inc_miss_counter(cache, bio);
1864 remap_cell_to_origin_clear_discard(cache, new_ocell, block, true);
1865 release_cell = false;
1866 break;
1867
1868 case POLICY_NEW:
1869 atomic_inc(&cache->stats.promotion);
1870 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1871 release_cell = false;
1872 break;
1873
1874 case POLICY_REPLACE:
1875 atomic_inc(&cache->stats.demotion);
1876 atomic_inc(&cache->stats.promotion);
1877 demote_then_promote(cache, structs, lookup_result.old_oblock,
1878 block, lookup_result.cblock,
1879 ool.cell, new_ocell);
1880 release_cell = false;
1881 break;
1882
1883 default:
1884 DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u",
1885 cache_device_name(cache), __func__,
1886 (unsigned) lookup_result.op);
1887 bio_io_error(bio);
1888 }
1889
1890 if (release_cell)
1891 cell_defer(cache, new_ocell, false);
1892}
1893
1894static void process_bio(struct cache *cache, struct prealloc *structs,
1895 struct bio *bio)
1896{
1897 int r;
1898 dm_oblock_t block = get_bio_block(cache, bio);
1899 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1900
1901
1902
1903
1904 cell_prealloc = prealloc_get_cell(structs);
1905 r = bio_detain(cache, block, bio, cell_prealloc,
1906 (cell_free_fn) prealloc_put_cell,
1907 structs, &new_ocell);
1908 if (r > 0)
1909 return;
1910
1911 process_cell(cache, structs, new_ocell);
1912}
1913
1914static int need_commit_due_to_time(struct cache *cache)
1915{
1916 return jiffies < cache->last_commit_jiffies ||
1917 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
1918}
1919
1920
1921
1922
1923static int commit(struct cache *cache, bool clean_shutdown)
1924{
1925 int r;
1926
1927 if (get_cache_mode(cache) >= CM_READ_ONLY)
1928 return -EINVAL;
1929
1930 atomic_inc(&cache->stats.commit_count);
1931 r = dm_cache_commit(cache->cmd, clean_shutdown);
1932 if (r)
1933 metadata_operation_failed(cache, "dm_cache_commit", r);
1934
1935 return r;
1936}
1937
1938static int commit_if_needed(struct cache *cache)
1939{
1940 int r = 0;
1941
1942 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1943 dm_cache_changed_this_transaction(cache->cmd)) {
1944 r = commit(cache, false);
1945 cache->commit_requested = false;
1946 cache->last_commit_jiffies = jiffies;
1947 }
1948
1949 return r;
1950}
1951
1952static void process_deferred_bios(struct cache *cache)
1953{
1954 bool prealloc_used = false;
1955 unsigned long flags;
1956 struct bio_list bios;
1957 struct bio *bio;
1958 struct prealloc structs;
1959
1960 memset(&structs, 0, sizeof(structs));
1961 bio_list_init(&bios);
1962
1963 spin_lock_irqsave(&cache->lock, flags);
1964 bio_list_merge(&bios, &cache->deferred_bios);
1965 bio_list_init(&cache->deferred_bios);
1966 spin_unlock_irqrestore(&cache->lock, flags);
1967
1968 while (!bio_list_empty(&bios)) {
1969
1970
1971
1972
1973
1974 prealloc_used = true;
1975 if (prealloc_data_structs(cache, &structs)) {
1976 spin_lock_irqsave(&cache->lock, flags);
1977 bio_list_merge(&cache->deferred_bios, &bios);
1978 spin_unlock_irqrestore(&cache->lock, flags);
1979 break;
1980 }
1981
1982 bio = bio_list_pop(&bios);
1983
1984 if (bio->bi_opf & REQ_PREFLUSH)
1985 process_flush_bio(cache, bio);
1986 else if (bio_op(bio) == REQ_OP_DISCARD)
1987 process_discard_bio(cache, &structs, bio);
1988 else
1989 process_bio(cache, &structs, bio);
1990 }
1991
1992 if (prealloc_used)
1993 prealloc_free_structs(cache, &structs);
1994}
1995
1996static void process_deferred_cells(struct cache *cache)
1997{
1998 bool prealloc_used = false;
1999 unsigned long flags;
2000 struct dm_bio_prison_cell *cell, *tmp;
2001 struct list_head cells;
2002 struct prealloc structs;
2003
2004 memset(&structs, 0, sizeof(structs));
2005
2006 INIT_LIST_HEAD(&cells);
2007
2008 spin_lock_irqsave(&cache->lock, flags);
2009 list_splice_init(&cache->deferred_cells, &cells);
2010 spin_unlock_irqrestore(&cache->lock, flags);
2011
2012 list_for_each_entry_safe(cell, tmp, &cells, user_list) {
2013
2014
2015
2016
2017
2018 prealloc_used = true;
2019 if (prealloc_data_structs(cache, &structs)) {
2020 spin_lock_irqsave(&cache->lock, flags);
2021 list_splice(&cells, &cache->deferred_cells);
2022 spin_unlock_irqrestore(&cache->lock, flags);
2023 break;
2024 }
2025
2026 process_cell(cache, &structs, cell);
2027 }
2028
2029 if (prealloc_used)
2030 prealloc_free_structs(cache, &structs);
2031}
2032
2033static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
2034{
2035 unsigned long flags;
2036 struct bio_list bios;
2037 struct bio *bio;
2038
2039 bio_list_init(&bios);
2040
2041 spin_lock_irqsave(&cache->lock, flags);
2042 bio_list_merge(&bios, &cache->deferred_flush_bios);
2043 bio_list_init(&cache->deferred_flush_bios);
2044 spin_unlock_irqrestore(&cache->lock, flags);
2045
2046
2047
2048
2049 while ((bio = bio_list_pop(&bios)))
2050 submit_bios ? accounted_request(cache, bio) : bio_io_error(bio);
2051}
2052
2053static void process_deferred_writethrough_bios(struct cache *cache)
2054{
2055 unsigned long flags;
2056 struct bio_list bios;
2057 struct bio *bio;
2058
2059 bio_list_init(&bios);
2060
2061 spin_lock_irqsave(&cache->lock, flags);
2062 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
2063 bio_list_init(&cache->deferred_writethrough_bios);
2064 spin_unlock_irqrestore(&cache->lock, flags);
2065
2066
2067
2068
2069 while ((bio = bio_list_pop(&bios)))
2070 accounted_request(cache, bio);
2071}
2072
2073static void writeback_some_dirty_blocks(struct cache *cache)
2074{
2075 bool prealloc_used = false;
2076 dm_oblock_t oblock;
2077 dm_cblock_t cblock;
2078 struct prealloc structs;
2079 struct dm_bio_prison_cell *old_ocell;
2080 bool busy = !iot_idle_for(&cache->origin_tracker, HZ);
2081
2082 memset(&structs, 0, sizeof(structs));
2083
2084 while (spare_migration_bandwidth(cache)) {
2085 if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
2086 break;
2087
2088 prealloc_used = true;
2089 if (prealloc_data_structs(cache, &structs) ||
2090 get_cell(cache, oblock, &structs, &old_ocell)) {
2091 policy_set_dirty(cache->policy, oblock);
2092 break;
2093 }
2094
2095 writeback(cache, &structs, oblock, cblock, old_ocell);
2096 }
2097
2098 if (prealloc_used)
2099 prealloc_free_structs(cache, &structs);
2100}
2101
2102
2103
2104
2105
2106
2107static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
2108{
2109 int r = 0;
2110 uint64_t begin = from_cblock(req->cblocks->begin);
2111 uint64_t end = from_cblock(req->cblocks->end);
2112
2113 while (begin != end) {
2114 r = policy_remove_cblock(cache->policy, to_cblock(begin));
2115 if (!r) {
2116 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
2117 if (r) {
2118 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
2119 break;
2120 }
2121
2122 } else if (r == -ENODATA) {
2123
2124 r = 0;
2125
2126 } else {
2127 DMERR("%s: policy_remove_cblock failed", cache_device_name(cache));
2128 break;
2129 }
2130
2131 begin++;
2132 }
2133
2134 cache->commit_requested = true;
2135
2136 req->err = r;
2137 atomic_set(&req->complete, 1);
2138
2139 wake_up(&req->result_wait);
2140}
2141
2142static void process_invalidation_requests(struct cache *cache)
2143{
2144 struct list_head list;
2145 struct invalidation_request *req, *tmp;
2146
2147 INIT_LIST_HEAD(&list);
2148 spin_lock(&cache->invalidation_lock);
2149 list_splice_init(&cache->invalidation_requests, &list);
2150 spin_unlock(&cache->invalidation_lock);
2151
2152 list_for_each_entry_safe (req, tmp, &list, list)
2153 process_invalidation_request(cache, req);
2154}
2155
2156
2157
2158
2159static bool is_quiescing(struct cache *cache)
2160{
2161 return atomic_read(&cache->quiescing);
2162}
2163
2164static void ack_quiescing(struct cache *cache)
2165{
2166 if (is_quiescing(cache)) {
2167 atomic_inc(&cache->quiescing_ack);
2168 wake_up(&cache->quiescing_wait);
2169 }
2170}
2171
2172static void wait_for_quiescing_ack(struct cache *cache)
2173{
2174 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
2175}
2176
2177static void start_quiescing(struct cache *cache)
2178{
2179 atomic_inc(&cache->quiescing);
2180 wait_for_quiescing_ack(cache);
2181}
2182
2183static void stop_quiescing(struct cache *cache)
2184{
2185 atomic_set(&cache->quiescing, 0);
2186 atomic_set(&cache->quiescing_ack, 0);
2187}
2188
2189static void wait_for_migrations(struct cache *cache)
2190{
2191 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
2192}
2193
2194static void stop_worker(struct cache *cache)
2195{
2196 cancel_delayed_work(&cache->waker);
2197 flush_workqueue(cache->wq);
2198}
2199
2200static void requeue_deferred_cells(struct cache *cache)
2201{
2202 unsigned long flags;
2203 struct list_head cells;
2204 struct dm_bio_prison_cell *cell, *tmp;
2205
2206 INIT_LIST_HEAD(&cells);
2207 spin_lock_irqsave(&cache->lock, flags);
2208 list_splice_init(&cache->deferred_cells, &cells);
2209 spin_unlock_irqrestore(&cache->lock, flags);
2210
2211 list_for_each_entry_safe(cell, tmp, &cells, user_list)
2212 cell_requeue(cache, cell);
2213}
2214
2215static void requeue_deferred_bios(struct cache *cache)
2216{
2217 struct bio *bio;
2218 struct bio_list bios;
2219
2220 bio_list_init(&bios);
2221 bio_list_merge(&bios, &cache->deferred_bios);
2222 bio_list_init(&cache->deferred_bios);
2223
2224 while ((bio = bio_list_pop(&bios))) {
2225 bio->bi_error = DM_ENDIO_REQUEUE;
2226 bio_endio(bio);
2227 }
2228}
2229
2230static int more_work(struct cache *cache)
2231{
2232 if (is_quiescing(cache))
2233 return !list_empty(&cache->quiesced_migrations) ||
2234 !list_empty(&cache->completed_migrations) ||
2235 !list_empty(&cache->need_commit_migrations);
2236 else
2237 return !bio_list_empty(&cache->deferred_bios) ||
2238 !list_empty(&cache->deferred_cells) ||
2239 !bio_list_empty(&cache->deferred_flush_bios) ||
2240 !bio_list_empty(&cache->deferred_writethrough_bios) ||
2241 !list_empty(&cache->quiesced_migrations) ||
2242 !list_empty(&cache->completed_migrations) ||
2243 !list_empty(&cache->need_commit_migrations) ||
2244 cache->invalidate;
2245}
2246
2247static void do_worker(struct work_struct *ws)
2248{
2249 struct cache *cache = container_of(ws, struct cache, worker);
2250
2251 do {
2252 if (!is_quiescing(cache)) {
2253 writeback_some_dirty_blocks(cache);
2254 process_deferred_writethrough_bios(cache);
2255 process_deferred_bios(cache);
2256 process_deferred_cells(cache);
2257 process_invalidation_requests(cache);
2258 }
2259
2260 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
2261 process_migrations(cache, &cache->completed_migrations, complete_migration);
2262
2263 if (commit_if_needed(cache)) {
2264 process_deferred_flush_bios(cache, false);
2265 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
2266 } else {
2267 process_deferred_flush_bios(cache, true);
2268 process_migrations(cache, &cache->need_commit_migrations,
2269 migration_success_post_commit);
2270 }
2271
2272 ack_quiescing(cache);
2273
2274 } while (more_work(cache));
2275}
2276
2277
2278
2279
2280
2281static void do_waker(struct work_struct *ws)
2282{
2283 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
2284 policy_tick(cache->policy, true);
2285 wake_worker(cache);
2286 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
2287}
2288
2289
2290
2291static int is_congested(struct dm_dev *dev, int bdi_bits)
2292{
2293 struct request_queue *q = bdev_get_queue(dev->bdev);
2294 return bdi_congested(&q->backing_dev_info, bdi_bits);
2295}
2296
2297static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2298{
2299 struct cache *cache = container_of(cb, struct cache, callbacks);
2300
2301 return is_congested(cache->origin_dev, bdi_bits) ||
2302 is_congested(cache->cache_dev, bdi_bits);
2303}
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313static void destroy(struct cache *cache)
2314{
2315 unsigned i;
2316
2317 mempool_destroy(cache->migration_pool);
2318
2319 if (cache->all_io_ds)
2320 dm_deferred_set_destroy(cache->all_io_ds);
2321
2322 if (cache->prison)
2323 dm_bio_prison_destroy(cache->prison);
2324
2325 if (cache->wq)
2326 destroy_workqueue(cache->wq);
2327
2328 if (cache->dirty_bitset)
2329 free_bitset(cache->dirty_bitset);
2330
2331 if (cache->discard_bitset)
2332 free_bitset(cache->discard_bitset);
2333
2334 if (cache->copier)
2335 dm_kcopyd_client_destroy(cache->copier);
2336
2337 if (cache->cmd)
2338 dm_cache_metadata_close(cache->cmd);
2339
2340 if (cache->metadata_dev)
2341 dm_put_device(cache->ti, cache->metadata_dev);
2342
2343 if (cache->origin_dev)
2344 dm_put_device(cache->ti, cache->origin_dev);
2345
2346 if (cache->cache_dev)
2347 dm_put_device(cache->ti, cache->cache_dev);
2348
2349 if (cache->policy)
2350 dm_cache_policy_destroy(cache->policy);
2351
2352 for (i = 0; i < cache->nr_ctr_args ; i++)
2353 kfree(cache->ctr_args[i]);
2354 kfree(cache->ctr_args);
2355
2356 kfree(cache);
2357}
2358
2359static void cache_dtr(struct dm_target *ti)
2360{
2361 struct cache *cache = ti->private;
2362
2363 destroy(cache);
2364}
2365
2366static sector_t get_dev_size(struct dm_dev *dev)
2367{
2368 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2369}
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402struct cache_args {
2403 struct dm_target *ti;
2404
2405 struct dm_dev *metadata_dev;
2406
2407 struct dm_dev *cache_dev;
2408 sector_t cache_sectors;
2409
2410 struct dm_dev *origin_dev;
2411 sector_t origin_sectors;
2412
2413 uint32_t block_size;
2414
2415 const char *policy_name;
2416 int policy_argc;
2417 const char **policy_argv;
2418
2419 struct cache_features features;
2420};
2421
2422static void destroy_cache_args(struct cache_args *ca)
2423{
2424 if (ca->metadata_dev)
2425 dm_put_device(ca->ti, ca->metadata_dev);
2426
2427 if (ca->cache_dev)
2428 dm_put_device(ca->ti, ca->cache_dev);
2429
2430 if (ca->origin_dev)
2431 dm_put_device(ca->ti, ca->origin_dev);
2432
2433 kfree(ca);
2434}
2435
2436static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2437{
2438 if (!as->argc) {
2439 *error = "Insufficient args";
2440 return false;
2441 }
2442
2443 return true;
2444}
2445
2446static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2447 char **error)
2448{
2449 int r;
2450 sector_t metadata_dev_size;
2451 char b[BDEVNAME_SIZE];
2452
2453 if (!at_least_one_arg(as, error))
2454 return -EINVAL;
2455
2456 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2457 &ca->metadata_dev);
2458 if (r) {
2459 *error = "Error opening metadata device";
2460 return r;
2461 }
2462
2463 metadata_dev_size = get_dev_size(ca->metadata_dev);
2464 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2465 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2466 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2467
2468 return 0;
2469}
2470
2471static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2472 char **error)
2473{
2474 int r;
2475
2476 if (!at_least_one_arg(as, error))
2477 return -EINVAL;
2478
2479 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2480 &ca->cache_dev);
2481 if (r) {
2482 *error = "Error opening cache device";
2483 return r;
2484 }
2485 ca->cache_sectors = get_dev_size(ca->cache_dev);
2486
2487 return 0;
2488}
2489
2490static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2491 char **error)
2492{
2493 int r;
2494
2495 if (!at_least_one_arg(as, error))
2496 return -EINVAL;
2497
2498 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2499 &ca->origin_dev);
2500 if (r) {
2501 *error = "Error opening origin device";
2502 return r;
2503 }
2504
2505 ca->origin_sectors = get_dev_size(ca->origin_dev);
2506 if (ca->ti->len > ca->origin_sectors) {
2507 *error = "Device size larger than cached device";
2508 return -EINVAL;
2509 }
2510
2511 return 0;
2512}
2513
2514static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2515 char **error)
2516{
2517 unsigned long block_size;
2518
2519 if (!at_least_one_arg(as, error))
2520 return -EINVAL;
2521
2522 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2523 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2524 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2525 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2526 *error = "Invalid data block size";
2527 return -EINVAL;
2528 }
2529
2530 if (block_size > ca->cache_sectors) {
2531 *error = "Data block size is larger than the cache device";
2532 return -EINVAL;
2533 }
2534
2535 ca->block_size = block_size;
2536
2537 return 0;
2538}
2539
2540static void init_features(struct cache_features *cf)
2541{
2542 cf->mode = CM_WRITE;
2543 cf->io_mode = CM_IO_WRITEBACK;
2544}
2545
2546static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2547 char **error)
2548{
2549 static struct dm_arg _args[] = {
2550 {0, 1, "Invalid number of cache feature arguments"},
2551 };
2552
2553 int r;
2554 unsigned argc;
2555 const char *arg;
2556 struct cache_features *cf = &ca->features;
2557
2558 init_features(cf);
2559
2560 r = dm_read_arg_group(_args, as, &argc, error);
2561 if (r)
2562 return -EINVAL;
2563
2564 while (argc--) {
2565 arg = dm_shift_arg(as);
2566
2567 if (!strcasecmp(arg, "writeback"))
2568 cf->io_mode = CM_IO_WRITEBACK;
2569
2570 else if (!strcasecmp(arg, "writethrough"))
2571 cf->io_mode = CM_IO_WRITETHROUGH;
2572
2573 else if (!strcasecmp(arg, "passthrough"))
2574 cf->io_mode = CM_IO_PASSTHROUGH;
2575
2576 else {
2577 *error = "Unrecognised cache feature requested";
2578 return -EINVAL;
2579 }
2580 }
2581
2582 return 0;
2583}
2584
2585static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2586 char **error)
2587{
2588 static struct dm_arg _args[] = {
2589 {0, 1024, "Invalid number of policy arguments"},
2590 };
2591
2592 int r;
2593
2594 if (!at_least_one_arg(as, error))
2595 return -EINVAL;
2596
2597 ca->policy_name = dm_shift_arg(as);
2598
2599 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2600 if (r)
2601 return -EINVAL;
2602
2603 ca->policy_argv = (const char **)as->argv;
2604 dm_consume_args(as, ca->policy_argc);
2605
2606 return 0;
2607}
2608
2609static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2610 char **error)
2611{
2612 int r;
2613 struct dm_arg_set as;
2614
2615 as.argc = argc;
2616 as.argv = argv;
2617
2618 r = parse_metadata_dev(ca, &as, error);
2619 if (r)
2620 return r;
2621
2622 r = parse_cache_dev(ca, &as, error);
2623 if (r)
2624 return r;
2625
2626 r = parse_origin_dev(ca, &as, error);
2627 if (r)
2628 return r;
2629
2630 r = parse_block_size(ca, &as, error);
2631 if (r)
2632 return r;
2633
2634 r = parse_features(ca, &as, error);
2635 if (r)
2636 return r;
2637
2638 r = parse_policy(ca, &as, error);
2639 if (r)
2640 return r;
2641
2642 return 0;
2643}
2644
2645
2646
2647static struct kmem_cache *migration_cache;
2648
2649#define NOT_CORE_OPTION 1
2650
2651static int process_config_option(struct cache *cache, const char *key, const char *value)
2652{
2653 unsigned long tmp;
2654
2655 if (!strcasecmp(key, "migration_threshold")) {
2656 if (kstrtoul(value, 10, &tmp))
2657 return -EINVAL;
2658
2659 cache->migration_threshold = tmp;
2660 return 0;
2661 }
2662
2663 return NOT_CORE_OPTION;
2664}
2665
2666static int set_config_value(struct cache *cache, const char *key, const char *value)
2667{
2668 int r = process_config_option(cache, key, value);
2669
2670 if (r == NOT_CORE_OPTION)
2671 r = policy_set_config_value(cache->policy, key, value);
2672
2673 if (r)
2674 DMWARN("bad config value for %s: %s", key, value);
2675
2676 return r;
2677}
2678
2679static int set_config_values(struct cache *cache, int argc, const char **argv)
2680{
2681 int r = 0;
2682
2683 if (argc & 1) {
2684 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2685 return -EINVAL;
2686 }
2687
2688 while (argc) {
2689 r = set_config_value(cache, argv[0], argv[1]);
2690 if (r)
2691 break;
2692
2693 argc -= 2;
2694 argv += 2;
2695 }
2696
2697 return r;
2698}
2699
2700static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2701 char **error)
2702{
2703 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2704 cache->cache_size,
2705 cache->origin_sectors,
2706 cache->sectors_per_block);
2707 if (IS_ERR(p)) {
2708 *error = "Error creating cache's policy";
2709 return PTR_ERR(p);
2710 }
2711 cache->policy = p;
2712
2713 return 0;
2714}
2715
2716
2717
2718
2719
2720#define MAX_DISCARD_BLOCKS (1 << 14)
2721
2722static bool too_many_discard_blocks(sector_t discard_block_size,
2723 sector_t origin_size)
2724{
2725 (void) sector_div(origin_size, discard_block_size);
2726
2727 return origin_size > MAX_DISCARD_BLOCKS;
2728}
2729
2730static sector_t calculate_discard_block_size(sector_t cache_block_size,
2731 sector_t origin_size)
2732{
2733 sector_t discard_block_size = cache_block_size;
2734
2735 if (origin_size)
2736 while (too_many_discard_blocks(discard_block_size, origin_size))
2737 discard_block_size *= 2;
2738
2739 return discard_block_size;
2740}
2741
2742static void set_cache_size(struct cache *cache, dm_cblock_t size)
2743{
2744 dm_block_t nr_blocks = from_cblock(size);
2745
2746 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2747 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2748 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2749 "Please consider increasing the cache block size to reduce the overall cache block count.",
2750 (unsigned long long) nr_blocks);
2751
2752 cache->cache_size = size;
2753}
2754
2755#define DEFAULT_MIGRATION_THRESHOLD 2048
2756
2757static int cache_create(struct cache_args *ca, struct cache **result)
2758{
2759 int r = 0;
2760 char **error = &ca->ti->error;
2761 struct cache *cache;
2762 struct dm_target *ti = ca->ti;
2763 dm_block_t origin_blocks;
2764 struct dm_cache_metadata *cmd;
2765 bool may_format = ca->features.mode == CM_WRITE;
2766
2767 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2768 if (!cache)
2769 return -ENOMEM;
2770
2771 cache->ti = ca->ti;
2772 ti->private = cache;
2773 ti->num_flush_bios = 2;
2774 ti->flush_supported = true;
2775
2776 ti->num_discard_bios = 1;
2777 ti->discards_supported = true;
2778 ti->discard_zeroes_data_unsupported = true;
2779 ti->split_discard_bios = false;
2780
2781 cache->features = ca->features;
2782 ti->per_io_data_size = get_per_bio_data_size(cache);
2783
2784 cache->callbacks.congested_fn = cache_is_congested;
2785 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2786
2787 cache->metadata_dev = ca->metadata_dev;
2788 cache->origin_dev = ca->origin_dev;
2789 cache->cache_dev = ca->cache_dev;
2790
2791 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2792
2793
2794 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2795 origin_blocks = block_div(origin_blocks, ca->block_size);
2796 cache->origin_blocks = to_oblock(origin_blocks);
2797
2798 cache->sectors_per_block = ca->block_size;
2799 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2800 r = -EINVAL;
2801 goto bad;
2802 }
2803
2804 if (ca->block_size & (ca->block_size - 1)) {
2805 dm_block_t cache_size = ca->cache_sectors;
2806
2807 cache->sectors_per_block_shift = -1;
2808 cache_size = block_div(cache_size, ca->block_size);
2809 set_cache_size(cache, to_cblock(cache_size));
2810 } else {
2811 cache->sectors_per_block_shift = __ffs(ca->block_size);
2812 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2813 }
2814
2815 r = create_cache_policy(cache, ca, error);
2816 if (r)
2817 goto bad;
2818
2819 cache->policy_nr_args = ca->policy_argc;
2820 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2821
2822 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2823 if (r) {
2824 *error = "Error setting cache policy's config values";
2825 goto bad;
2826 }
2827
2828 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2829 ca->block_size, may_format,
2830 dm_cache_policy_get_hint_size(cache->policy));
2831 if (IS_ERR(cmd)) {
2832 *error = "Error creating metadata object";
2833 r = PTR_ERR(cmd);
2834 goto bad;
2835 }
2836 cache->cmd = cmd;
2837 set_cache_mode(cache, CM_WRITE);
2838 if (get_cache_mode(cache) != CM_WRITE) {
2839 *error = "Unable to get write access to metadata, please check/repair metadata.";
2840 r = -EINVAL;
2841 goto bad;
2842 }
2843
2844 if (passthrough_mode(&cache->features)) {
2845 bool all_clean;
2846
2847 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2848 if (r) {
2849 *error = "dm_cache_metadata_all_clean() failed";
2850 goto bad;
2851 }
2852
2853 if (!all_clean) {
2854 *error = "Cannot enter passthrough mode unless all blocks are clean";
2855 r = -EINVAL;
2856 goto bad;
2857 }
2858 }
2859
2860 spin_lock_init(&cache->lock);
2861 INIT_LIST_HEAD(&cache->deferred_cells);
2862 bio_list_init(&cache->deferred_bios);
2863 bio_list_init(&cache->deferred_flush_bios);
2864 bio_list_init(&cache->deferred_writethrough_bios);
2865 INIT_LIST_HEAD(&cache->quiesced_migrations);
2866 INIT_LIST_HEAD(&cache->completed_migrations);
2867 INIT_LIST_HEAD(&cache->need_commit_migrations);
2868 atomic_set(&cache->nr_allocated_migrations, 0);
2869 atomic_set(&cache->nr_io_migrations, 0);
2870 init_waitqueue_head(&cache->migration_wait);
2871
2872 init_waitqueue_head(&cache->quiescing_wait);
2873 atomic_set(&cache->quiescing, 0);
2874 atomic_set(&cache->quiescing_ack, 0);
2875
2876 r = -ENOMEM;
2877 atomic_set(&cache->nr_dirty, 0);
2878 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2879 if (!cache->dirty_bitset) {
2880 *error = "could not allocate dirty bitset";
2881 goto bad;
2882 }
2883 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2884
2885 cache->discard_block_size =
2886 calculate_discard_block_size(cache->sectors_per_block,
2887 cache->origin_sectors);
2888 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2889 cache->discard_block_size));
2890 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2891 if (!cache->discard_bitset) {
2892 *error = "could not allocate discard bitset";
2893 goto bad;
2894 }
2895 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2896
2897 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2898 if (IS_ERR(cache->copier)) {
2899 *error = "could not create kcopyd client";
2900 r = PTR_ERR(cache->copier);
2901 goto bad;
2902 }
2903
2904 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2905 if (!cache->wq) {
2906 *error = "could not create workqueue for metadata object";
2907 goto bad;
2908 }
2909 INIT_WORK(&cache->worker, do_worker);
2910 INIT_DELAYED_WORK(&cache->waker, do_waker);
2911 cache->last_commit_jiffies = jiffies;
2912
2913 cache->prison = dm_bio_prison_create();
2914 if (!cache->prison) {
2915 *error = "could not create bio prison";
2916 goto bad;
2917 }
2918
2919 cache->all_io_ds = dm_deferred_set_create();
2920 if (!cache->all_io_ds) {
2921 *error = "could not create all_io deferred set";
2922 goto bad;
2923 }
2924
2925 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2926 migration_cache);
2927 if (!cache->migration_pool) {
2928 *error = "Error creating cache's migration mempool";
2929 goto bad;
2930 }
2931
2932 cache->need_tick_bio = true;
2933 cache->sized = false;
2934 cache->invalidate = false;
2935 cache->commit_requested = false;
2936 cache->loaded_mappings = false;
2937 cache->loaded_discards = false;
2938
2939 load_stats(cache);
2940
2941 atomic_set(&cache->stats.demotion, 0);
2942 atomic_set(&cache->stats.promotion, 0);
2943 atomic_set(&cache->stats.copies_avoided, 0);
2944 atomic_set(&cache->stats.cache_cell_clash, 0);
2945 atomic_set(&cache->stats.commit_count, 0);
2946 atomic_set(&cache->stats.discard_count, 0);
2947
2948 spin_lock_init(&cache->invalidation_lock);
2949 INIT_LIST_HEAD(&cache->invalidation_requests);
2950
2951 iot_init(&cache->origin_tracker);
2952
2953 *result = cache;
2954 return 0;
2955
2956bad:
2957 destroy(cache);
2958 return r;
2959}
2960
2961static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2962{
2963 unsigned i;
2964 const char **copy;
2965
2966 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2967 if (!copy)
2968 return -ENOMEM;
2969 for (i = 0; i < argc; i++) {
2970 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2971 if (!copy[i]) {
2972 while (i--)
2973 kfree(copy[i]);
2974 kfree(copy);
2975 return -ENOMEM;
2976 }
2977 }
2978
2979 cache->nr_ctr_args = argc;
2980 cache->ctr_args = copy;
2981
2982 return 0;
2983}
2984
2985static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2986{
2987 int r = -EINVAL;
2988 struct cache_args *ca;
2989 struct cache *cache = NULL;
2990
2991 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2992 if (!ca) {
2993 ti->error = "Error allocating memory for cache";
2994 return -ENOMEM;
2995 }
2996 ca->ti = ti;
2997
2998 r = parse_cache_args(ca, argc, argv, &ti->error);
2999 if (r)
3000 goto out;
3001
3002 r = cache_create(ca, &cache);
3003 if (r)
3004 goto out;
3005
3006 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
3007 if (r) {
3008 destroy(cache);
3009 goto out;
3010 }
3011
3012 ti->private = cache;
3013
3014out:
3015 destroy_cache_args(ca);
3016 return r;
3017}
3018
3019
3020
3021static int cache_map(struct dm_target *ti, struct bio *bio)
3022{
3023 struct cache *cache = ti->private;
3024
3025 int r;
3026 struct dm_bio_prison_cell *cell = NULL;
3027 dm_oblock_t block = get_bio_block(cache, bio);
3028 size_t pb_data_size = get_per_bio_data_size(cache);
3029 bool can_migrate = false;
3030 bool fast_promotion;
3031 struct policy_result lookup_result;
3032 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
3033 struct old_oblock_lock ool;
3034
3035 ool.locker.fn = null_locker;
3036
3037 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
3038
3039
3040
3041
3042
3043 remap_to_origin(cache, bio);
3044 accounted_begin(cache, bio);
3045 return DM_MAPIO_REMAPPED;
3046 }
3047
3048 if (discard_or_flush(bio)) {
3049 defer_bio(cache, bio);
3050 return DM_MAPIO_SUBMITTED;
3051 }
3052
3053
3054
3055
3056 cell = alloc_prison_cell(cache);
3057 if (!cell) {
3058 defer_bio(cache, bio);
3059 return DM_MAPIO_SUBMITTED;
3060 }
3061
3062 r = bio_detain(cache, block, bio, cell,
3063 (cell_free_fn) free_prison_cell,
3064 cache, &cell);
3065 if (r) {
3066 if (r < 0)
3067 defer_bio(cache, bio);
3068
3069 return DM_MAPIO_SUBMITTED;
3070 }
3071
3072 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
3073
3074 r = policy_map(cache->policy, block, false, can_migrate, fast_promotion,
3075 bio, &ool.locker, &lookup_result);
3076 if (r == -EWOULDBLOCK) {
3077 cell_defer(cache, cell, true);
3078 return DM_MAPIO_SUBMITTED;
3079
3080 } else if (r) {
3081 DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d",
3082 cache_device_name(cache), r);
3083 cell_defer(cache, cell, false);
3084 bio_io_error(bio);
3085 return DM_MAPIO_SUBMITTED;
3086 }
3087
3088 r = DM_MAPIO_REMAPPED;
3089 switch (lookup_result.op) {
3090 case POLICY_HIT:
3091 if (passthrough_mode(&cache->features)) {
3092 if (bio_data_dir(bio) == WRITE) {
3093
3094
3095
3096
3097 cell_defer(cache, cell, true);
3098 r = DM_MAPIO_SUBMITTED;
3099
3100 } else {
3101 inc_miss_counter(cache, bio);
3102 remap_to_origin_clear_discard(cache, bio, block);
3103 accounted_begin(cache, bio);
3104 inc_ds(cache, bio, cell);
3105
3106
3107 cell_defer(cache, cell, false);
3108 }
3109
3110 } else {
3111 inc_hit_counter(cache, bio);
3112 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
3113 !is_dirty(cache, lookup_result.cblock)) {
3114 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
3115 accounted_begin(cache, bio);
3116 inc_ds(cache, bio, cell);
3117 cell_defer(cache, cell, false);
3118
3119 } else
3120 remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false);
3121 }
3122 break;
3123
3124 case POLICY_MISS:
3125 inc_miss_counter(cache, bio);
3126 if (pb->req_nr != 0) {
3127
3128
3129
3130
3131 bio_endio(bio);
3132
3133 cell_defer(cache, cell, false);
3134 r = DM_MAPIO_SUBMITTED;
3135
3136 } else
3137 remap_cell_to_origin_clear_discard(cache, cell, block, false);
3138 break;
3139
3140 default:
3141 DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u",
3142 cache_device_name(cache), __func__,
3143 (unsigned) lookup_result.op);
3144 cell_defer(cache, cell, false);
3145 bio_io_error(bio);
3146 r = DM_MAPIO_SUBMITTED;
3147 }
3148
3149 return r;
3150}
3151
3152static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
3153{
3154 struct cache *cache = ti->private;
3155 unsigned long flags;
3156 size_t pb_data_size = get_per_bio_data_size(cache);
3157 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
3158
3159 if (pb->tick) {
3160 policy_tick(cache->policy, false);
3161
3162 spin_lock_irqsave(&cache->lock, flags);
3163 cache->need_tick_bio = true;
3164 spin_unlock_irqrestore(&cache->lock, flags);
3165 }
3166
3167 check_for_quiesced_migrations(cache, pb);
3168 accounted_complete(cache, bio);
3169
3170 return 0;
3171}
3172
3173static int write_dirty_bitset(struct cache *cache)
3174{
3175 unsigned i, r;
3176
3177 if (get_cache_mode(cache) >= CM_READ_ONLY)
3178 return -EINVAL;
3179
3180 for (i = 0; i < from_cblock(cache->cache_size); i++) {
3181 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
3182 is_dirty(cache, to_cblock(i)));
3183 if (r) {
3184 metadata_operation_failed(cache, "dm_cache_set_dirty", r);
3185 return r;
3186 }
3187 }
3188
3189 return 0;
3190}
3191
3192static int write_discard_bitset(struct cache *cache)
3193{
3194 unsigned i, r;
3195
3196 if (get_cache_mode(cache) >= CM_READ_ONLY)
3197 return -EINVAL;
3198
3199 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
3200 cache->discard_nr_blocks);
3201 if (r) {
3202 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
3203 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
3204 return r;
3205 }
3206
3207 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
3208 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
3209 is_discarded(cache, to_dblock(i)));
3210 if (r) {
3211 metadata_operation_failed(cache, "dm_cache_set_discard", r);
3212 return r;
3213 }
3214 }
3215
3216 return 0;
3217}
3218
3219static int write_hints(struct cache *cache)
3220{
3221 int r;
3222
3223 if (get_cache_mode(cache) >= CM_READ_ONLY)
3224 return -EINVAL;
3225
3226 r = dm_cache_write_hints(cache->cmd, cache->policy);
3227 if (r) {
3228 metadata_operation_failed(cache, "dm_cache_write_hints", r);
3229 return r;
3230 }
3231
3232 return 0;
3233}
3234
3235
3236
3237
3238static bool sync_metadata(struct cache *cache)
3239{
3240 int r1, r2, r3, r4;
3241
3242 r1 = write_dirty_bitset(cache);
3243 if (r1)
3244 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
3245
3246 r2 = write_discard_bitset(cache);
3247 if (r2)
3248 DMERR("%s: could not write discard bitset", cache_device_name(cache));
3249
3250 save_stats(cache);
3251
3252 r3 = write_hints(cache);
3253 if (r3)
3254 DMERR("%s: could not write hints", cache_device_name(cache));
3255
3256
3257
3258
3259
3260
3261 r4 = commit(cache, !r1 && !r2 && !r3);
3262 if (r4)
3263 DMERR("%s: could not write cache metadata", cache_device_name(cache));
3264
3265 return !r1 && !r2 && !r3 && !r4;
3266}
3267
3268static void cache_postsuspend(struct dm_target *ti)
3269{
3270 struct cache *cache = ti->private;
3271
3272 start_quiescing(cache);
3273 wait_for_migrations(cache);
3274 stop_worker(cache);
3275 requeue_deferred_bios(cache);
3276 requeue_deferred_cells(cache);
3277 stop_quiescing(cache);
3278
3279 if (get_cache_mode(cache) == CM_WRITE)
3280 (void) sync_metadata(cache);
3281}
3282
3283static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
3284 bool dirty, uint32_t hint, bool hint_valid)
3285{
3286 int r;
3287 struct cache *cache = context;
3288
3289 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
3290 if (r)
3291 return r;
3292
3293 if (dirty)
3294 set_dirty(cache, oblock, cblock);
3295 else
3296 clear_dirty(cache, oblock, cblock);
3297
3298 return 0;
3299}
3300
3301
3302
3303
3304
3305
3306
3307struct discard_load_info {
3308 struct cache *cache;
3309
3310
3311
3312
3313
3314 dm_block_t block_size;
3315 dm_block_t discard_begin, discard_end;
3316};
3317
3318static void discard_load_info_init(struct cache *cache,
3319 struct discard_load_info *li)
3320{
3321 li->cache = cache;
3322 li->discard_begin = li->discard_end = 0;
3323}
3324
3325static void set_discard_range(struct discard_load_info *li)
3326{
3327 sector_t b, e;
3328
3329 if (li->discard_begin == li->discard_end)
3330 return;
3331
3332
3333
3334
3335 b = li->discard_begin * li->block_size;
3336 e = li->discard_end * li->block_size;
3337
3338
3339
3340
3341 b = dm_sector_div_up(b, li->cache->discard_block_size);
3342 sector_div(e, li->cache->discard_block_size);
3343
3344
3345
3346
3347
3348 if (e > from_dblock(li->cache->discard_nr_blocks))
3349 e = from_dblock(li->cache->discard_nr_blocks);
3350
3351 for (; b < e; b++)
3352 set_discard(li->cache, to_dblock(b));
3353}
3354
3355static int load_discard(void *context, sector_t discard_block_size,
3356 dm_dblock_t dblock, bool discard)
3357{
3358 struct discard_load_info *li = context;
3359
3360 li->block_size = discard_block_size;
3361
3362 if (discard) {
3363 if (from_dblock(dblock) == li->discard_end)
3364
3365
3366
3367 li->discard_end = li->discard_end + 1ULL;
3368
3369 else {
3370
3371
3372
3373 set_discard_range(li);
3374 li->discard_begin = from_dblock(dblock);
3375 li->discard_end = li->discard_begin + 1ULL;
3376 }
3377 } else {
3378 set_discard_range(li);
3379 li->discard_begin = li->discard_end = 0;
3380 }
3381
3382 return 0;
3383}
3384
3385static dm_cblock_t get_cache_dev_size(struct cache *cache)
3386{
3387 sector_t size = get_dev_size(cache->cache_dev);
3388 (void) sector_div(size, cache->sectors_per_block);
3389 return to_cblock(size);
3390}
3391
3392static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3393{
3394 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3395 return true;
3396
3397
3398
3399
3400 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3401 new_size = to_cblock(from_cblock(new_size) + 1);
3402 if (is_dirty(cache, new_size)) {
3403 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
3404 cache_device_name(cache),
3405 (unsigned long long) from_cblock(new_size));
3406 return false;
3407 }
3408 }
3409
3410 return true;
3411}
3412
3413static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3414{
3415 int r;
3416
3417 r = dm_cache_resize(cache->cmd, new_size);
3418 if (r) {
3419 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3420 metadata_operation_failed(cache, "dm_cache_resize", r);
3421 return r;
3422 }
3423
3424 set_cache_size(cache, new_size);
3425
3426 return 0;
3427}
3428
3429static int cache_preresume(struct dm_target *ti)
3430{
3431 int r = 0;
3432 struct cache *cache = ti->private;
3433 dm_cblock_t csize = get_cache_dev_size(cache);
3434
3435
3436
3437
3438 if (!cache->sized) {
3439 r = resize_cache_dev(cache, csize);
3440 if (r)
3441 return r;
3442
3443 cache->sized = true;
3444
3445 } else if (csize != cache->cache_size) {
3446 if (!can_resize(cache, csize))
3447 return -EINVAL;
3448
3449 r = resize_cache_dev(cache, csize);
3450 if (r)
3451 return r;
3452 }
3453
3454 if (!cache->loaded_mappings) {
3455 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3456 load_mapping, cache);
3457 if (r) {
3458 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3459 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3460 return r;
3461 }
3462
3463 cache->loaded_mappings = true;
3464 }
3465
3466 if (!cache->loaded_discards) {
3467 struct discard_load_info li;
3468
3469
3470
3471
3472
3473
3474 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3475
3476 discard_load_info_init(cache, &li);
3477 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3478 if (r) {
3479 DMERR("%s: could not load origin discards", cache_device_name(cache));
3480 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3481 return r;
3482 }
3483 set_discard_range(&li);
3484
3485 cache->loaded_discards = true;
3486 }
3487
3488 return r;
3489}
3490
3491static void cache_resume(struct dm_target *ti)
3492{
3493 struct cache *cache = ti->private;
3494
3495 cache->need_tick_bio = true;
3496 do_waker(&cache->waker.work);
3497}
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510static void cache_status(struct dm_target *ti, status_type_t type,
3511 unsigned status_flags, char *result, unsigned maxlen)
3512{
3513 int r = 0;
3514 unsigned i;
3515 ssize_t sz = 0;
3516 dm_block_t nr_free_blocks_metadata = 0;
3517 dm_block_t nr_blocks_metadata = 0;
3518 char buf[BDEVNAME_SIZE];
3519 struct cache *cache = ti->private;
3520 dm_cblock_t residency;
3521 bool needs_check;
3522
3523 switch (type) {
3524 case STATUSTYPE_INFO:
3525 if (get_cache_mode(cache) == CM_FAIL) {
3526 DMEMIT("Fail");
3527 break;
3528 }
3529
3530
3531 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3532 (void) commit(cache, false);
3533
3534 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3535 if (r) {
3536 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3537 cache_device_name(cache), r);
3538 goto err;
3539 }
3540
3541 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3542 if (r) {
3543 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3544 cache_device_name(cache), r);
3545 goto err;
3546 }
3547
3548 residency = policy_residency(cache->policy);
3549
3550 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
3551 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3552 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3553 (unsigned long long)nr_blocks_metadata,
3554 cache->sectors_per_block,
3555 (unsigned long long) from_cblock(residency),
3556 (unsigned long long) from_cblock(cache->cache_size),
3557 (unsigned) atomic_read(&cache->stats.read_hit),
3558 (unsigned) atomic_read(&cache->stats.read_miss),
3559 (unsigned) atomic_read(&cache->stats.write_hit),
3560 (unsigned) atomic_read(&cache->stats.write_miss),
3561 (unsigned) atomic_read(&cache->stats.demotion),
3562 (unsigned) atomic_read(&cache->stats.promotion),
3563 (unsigned long) atomic_read(&cache->nr_dirty));
3564
3565 if (writethrough_mode(&cache->features))
3566 DMEMIT("1 writethrough ");
3567
3568 else if (passthrough_mode(&cache->features))
3569 DMEMIT("1 passthrough ");
3570
3571 else if (writeback_mode(&cache->features))
3572 DMEMIT("1 writeback ");
3573
3574 else {
3575 DMERR("%s: internal error: unknown io mode: %d",
3576 cache_device_name(cache), (int) cache->features.io_mode);
3577 goto err;
3578 }
3579
3580 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3581
3582 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3583 if (sz < maxlen) {
3584 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3585 if (r)
3586 DMERR("%s: policy_emit_config_values returned %d",
3587 cache_device_name(cache), r);
3588 }
3589
3590 if (get_cache_mode(cache) == CM_READ_ONLY)
3591 DMEMIT("ro ");
3592 else
3593 DMEMIT("rw ");
3594
3595 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3596
3597 if (r || needs_check)
3598 DMEMIT("needs_check ");
3599 else
3600 DMEMIT("- ");
3601
3602 break;
3603
3604 case STATUSTYPE_TABLE:
3605 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3606 DMEMIT("%s ", buf);
3607 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3608 DMEMIT("%s ", buf);
3609 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3610 DMEMIT("%s", buf);
3611
3612 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3613 DMEMIT(" %s", cache->ctr_args[i]);
3614 if (cache->nr_ctr_args)
3615 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3616 }
3617
3618 return;
3619
3620err:
3621 DMEMIT("Error");
3622}
3623
3624
3625
3626
3627
3628
3629
3630static int parse_cblock_range(struct cache *cache, const char *str,
3631 struct cblock_range *result)
3632{
3633 char dummy;
3634 uint64_t b, e;
3635 int r;
3636
3637
3638
3639
3640 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3641 if (r < 0)
3642 return r;
3643
3644 if (r == 2) {
3645 result->begin = to_cblock(b);
3646 result->end = to_cblock(e);
3647 return 0;
3648 }
3649
3650
3651
3652
3653 r = sscanf(str, "%llu%c", &b, &dummy);
3654 if (r < 0)
3655 return r;
3656
3657 if (r == 1) {
3658 result->begin = to_cblock(b);
3659 result->end = to_cblock(from_cblock(result->begin) + 1u);
3660 return 0;
3661 }
3662
3663 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3664 return -EINVAL;
3665}
3666
3667static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3668{
3669 uint64_t b = from_cblock(range->begin);
3670 uint64_t e = from_cblock(range->end);
3671 uint64_t n = from_cblock(cache->cache_size);
3672
3673 if (b >= n) {
3674 DMERR("%s: begin cblock out of range: %llu >= %llu",
3675 cache_device_name(cache), b, n);
3676 return -EINVAL;
3677 }
3678
3679 if (e > n) {
3680 DMERR("%s: end cblock out of range: %llu > %llu",
3681 cache_device_name(cache), e, n);
3682 return -EINVAL;
3683 }
3684
3685 if (b >= e) {
3686 DMERR("%s: invalid cblock range: %llu >= %llu",
3687 cache_device_name(cache), b, e);
3688 return -EINVAL;
3689 }
3690
3691 return 0;
3692}
3693
3694static int request_invalidation(struct cache *cache, struct cblock_range *range)
3695{
3696 struct invalidation_request req;
3697
3698 INIT_LIST_HEAD(&req.list);
3699 req.cblocks = range;
3700 atomic_set(&req.complete, 0);
3701 req.err = 0;
3702 init_waitqueue_head(&req.result_wait);
3703
3704 spin_lock(&cache->invalidation_lock);
3705 list_add(&req.list, &cache->invalidation_requests);
3706 spin_unlock(&cache->invalidation_lock);
3707 wake_worker(cache);
3708
3709 wait_event(req.result_wait, atomic_read(&req.complete));
3710 return req.err;
3711}
3712
3713static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3714 const char **cblock_ranges)
3715{
3716 int r = 0;
3717 unsigned i;
3718 struct cblock_range range;
3719
3720 if (!passthrough_mode(&cache->features)) {
3721 DMERR("%s: cache has to be in passthrough mode for invalidation",
3722 cache_device_name(cache));
3723 return -EPERM;
3724 }
3725
3726 for (i = 0; i < count; i++) {
3727 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3728 if (r)
3729 break;
3730
3731 r = validate_cblock_range(cache, &range);
3732 if (r)
3733 break;
3734
3735
3736
3737
3738 r = request_invalidation(cache, &range);
3739 if (r)
3740 break;
3741 }
3742
3743 return r;
3744}
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3755{
3756 struct cache *cache = ti->private;
3757
3758 if (!argc)
3759 return -EINVAL;
3760
3761 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3762 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3763 cache_device_name(cache));
3764 return -EOPNOTSUPP;
3765 }
3766
3767 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3768 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3769
3770 if (argc != 2)
3771 return -EINVAL;
3772
3773 return set_config_value(cache, argv[0], argv[1]);
3774}
3775
3776static int cache_iterate_devices(struct dm_target *ti,
3777 iterate_devices_callout_fn fn, void *data)
3778{
3779 int r = 0;
3780 struct cache *cache = ti->private;
3781
3782 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3783 if (!r)
3784 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3785
3786 return r;
3787}
3788
3789static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3790{
3791
3792
3793
3794 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3795 cache->origin_sectors);
3796 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3797}
3798
3799static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3800{
3801 struct cache *cache = ti->private;
3802 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3803
3804
3805
3806
3807
3808 if (io_opt_sectors < cache->sectors_per_block ||
3809 do_div(io_opt_sectors, cache->sectors_per_block)) {
3810 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3811 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3812 }
3813 set_discard_limits(cache, limits);
3814}
3815
3816
3817
3818static struct target_type cache_target = {
3819 .name = "cache",
3820 .version = {1, 9, 0},
3821 .module = THIS_MODULE,
3822 .ctr = cache_ctr,
3823 .dtr = cache_dtr,
3824 .map = cache_map,
3825 .end_io = cache_end_io,
3826 .postsuspend = cache_postsuspend,
3827 .preresume = cache_preresume,
3828 .resume = cache_resume,
3829 .status = cache_status,
3830 .message = cache_message,
3831 .iterate_devices = cache_iterate_devices,
3832 .io_hints = cache_io_hints,
3833};
3834
3835static int __init dm_cache_init(void)
3836{
3837 int r;
3838
3839 r = dm_register_target(&cache_target);
3840 if (r) {
3841 DMERR("cache target registration failed: %d", r);
3842 return r;
3843 }
3844
3845 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3846 if (!migration_cache) {
3847 dm_unregister_target(&cache_target);
3848 return -ENOMEM;
3849 }
3850
3851 return 0;
3852}
3853
3854static void __exit dm_cache_exit(void)
3855{
3856 dm_unregister_target(&cache_target);
3857 kmem_cache_destroy(migration_cache);
3858}
3859
3860module_init(dm_cache_init);
3861module_exit(dm_cache_exit);
3862
3863MODULE_DESCRIPTION(DM_NAME " cache target");
3864MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3865MODULE_LICENSE("GPL");
3866