1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26
27
28#define IOT_RESOLUTION 4
29
30struct io_tracker {
31 spinlock_t lock;
32
33
34
35
36 sector_t in_flight;
37
38
39
40
41
42 unsigned long idle_time;
43 unsigned long last_update_time;
44};
45
46static void iot_init(struct io_tracker *iot)
47{
48 spin_lock_init(&iot->lock);
49 iot->in_flight = 0ul;
50 iot->idle_time = 0ul;
51 iot->last_update_time = jiffies;
52}
53
54static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
55{
56 if (iot->in_flight)
57 return false;
58
59 return time_after(jiffies, iot->idle_time + jifs);
60}
61
62static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
63{
64 bool r;
65 unsigned long flags;
66
67 spin_lock_irqsave(&iot->lock, flags);
68 r = __iot_idle_for(iot, jifs);
69 spin_unlock_irqrestore(&iot->lock, flags);
70
71 return r;
72}
73
74static void iot_io_begin(struct io_tracker *iot, sector_t len)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&iot->lock, flags);
79 iot->in_flight += len;
80 spin_unlock_irqrestore(&iot->lock, flags);
81}
82
83static void __iot_io_end(struct io_tracker *iot, sector_t len)
84{
85 iot->in_flight -= len;
86 if (!iot->in_flight)
87 iot->idle_time = jiffies;
88}
89
90static void iot_io_end(struct io_tracker *iot, sector_t len)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&iot->lock, flags);
95 __iot_io_end(iot, len);
96 spin_unlock_irqrestore(&iot->lock, flags);
97}
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119struct dm_hook_info {
120 bio_end_io_t *bi_end_io;
121};
122
123static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
124 bio_end_io_t *bi_end_io, void *bi_private)
125{
126 h->bi_end_io = bio->bi_end_io;
127
128 bio->bi_end_io = bi_end_io;
129 bio->bi_private = bi_private;
130}
131
132static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
133{
134 bio->bi_end_io = h->bi_end_io;
135}
136
137
138
139#define MIGRATION_POOL_SIZE 128
140#define COMMIT_PERIOD HZ
141#define MIGRATION_COUNT_WINDOW 10
142
143
144
145
146
147#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
148#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
149
150enum cache_metadata_mode {
151 CM_WRITE,
152 CM_READ_ONLY,
153 CM_FAIL
154};
155
156enum cache_io_mode {
157
158
159
160
161
162 CM_IO_WRITEBACK,
163
164
165
166
167
168 CM_IO_WRITETHROUGH,
169
170
171
172
173
174
175
176 CM_IO_PASSTHROUGH
177};
178
179struct cache_features {
180 enum cache_metadata_mode mode;
181 enum cache_io_mode io_mode;
182};
183
184struct cache_stats {
185 atomic_t read_hit;
186 atomic_t read_miss;
187 atomic_t write_hit;
188 atomic_t write_miss;
189 atomic_t demotion;
190 atomic_t promotion;
191 atomic_t copies_avoided;
192 atomic_t cache_cell_clash;
193 atomic_t commit_count;
194 atomic_t discard_count;
195};
196
197
198
199
200
201struct cblock_range {
202 dm_cblock_t begin;
203 dm_cblock_t end;
204};
205
206struct invalidation_request {
207 struct list_head list;
208 struct cblock_range *cblocks;
209
210 atomic_t complete;
211 int err;
212
213 wait_queue_head_t result_wait;
214};
215
216struct cache {
217 struct dm_target *ti;
218 struct dm_target_callbacks callbacks;
219
220 struct dm_cache_metadata *cmd;
221
222
223
224
225 struct dm_dev *metadata_dev;
226
227
228
229
230 struct dm_dev *origin_dev;
231
232
233
234
235 struct dm_dev *cache_dev;
236
237
238
239
240 dm_oblock_t origin_blocks;
241 sector_t origin_sectors;
242
243
244
245
246 dm_cblock_t cache_size;
247
248
249
250
251 uint32_t sectors_per_block;
252 int sectors_per_block_shift;
253
254 spinlock_t lock;
255 struct list_head deferred_cells;
256 struct bio_list deferred_bios;
257 struct bio_list deferred_flush_bios;
258 struct bio_list deferred_writethrough_bios;
259 struct list_head quiesced_migrations;
260 struct list_head completed_migrations;
261 struct list_head need_commit_migrations;
262 sector_t migration_threshold;
263 wait_queue_head_t migration_wait;
264 atomic_t nr_allocated_migrations;
265
266
267
268
269
270 atomic_t nr_io_migrations;
271
272 wait_queue_head_t quiescing_wait;
273 atomic_t quiescing;
274 atomic_t quiescing_ack;
275
276
277
278
279 atomic_t nr_dirty;
280 unsigned long *dirty_bitset;
281
282
283
284
285 dm_dblock_t discard_nr_blocks;
286 unsigned long *discard_bitset;
287 uint32_t discard_block_size;
288
289
290
291
292
293 unsigned nr_ctr_args;
294 const char **ctr_args;
295
296 struct dm_kcopyd_client *copier;
297 struct workqueue_struct *wq;
298 struct work_struct worker;
299
300 struct delayed_work waker;
301 unsigned long last_commit_jiffies;
302
303 struct dm_bio_prison *prison;
304 struct dm_deferred_set *all_io_ds;
305
306 mempool_t *migration_pool;
307
308 struct dm_cache_policy *policy;
309 unsigned policy_nr_args;
310
311 bool need_tick_bio:1;
312 bool sized:1;
313 bool invalidate:1;
314 bool commit_requested:1;
315 bool loaded_mappings:1;
316 bool loaded_discards:1;
317
318
319
320
321 struct cache_features features;
322
323 struct cache_stats stats;
324
325
326
327
328 spinlock_t invalidation_lock;
329 struct list_head invalidation_requests;
330
331 struct io_tracker origin_tracker;
332};
333
334struct per_bio_data {
335 bool tick:1;
336 unsigned req_nr:2;
337 struct dm_deferred_entry *all_io_entry;
338 struct dm_hook_info hook_info;
339 sector_t len;
340
341
342
343
344
345
346 struct cache *cache;
347 dm_cblock_t cblock;
348 struct dm_bio_details bio_details;
349};
350
351struct dm_cache_migration {
352 struct list_head list;
353 struct cache *cache;
354
355 unsigned long start_jiffies;
356 dm_oblock_t old_oblock;
357 dm_oblock_t new_oblock;
358 dm_cblock_t cblock;
359
360 bool err:1;
361 bool discard:1;
362 bool writeback:1;
363 bool demote:1;
364 bool promote:1;
365 bool requeue_holder:1;
366 bool invalidate:1;
367
368 struct dm_bio_prison_cell *old_ocell;
369 struct dm_bio_prison_cell *new_ocell;
370};
371
372
373
374
375
376
377struct prealloc {
378 struct dm_cache_migration *mg;
379 struct dm_bio_prison_cell *cell1;
380 struct dm_bio_prison_cell *cell2;
381};
382
383static enum cache_metadata_mode get_cache_mode(struct cache *cache);
384
385static void wake_worker(struct cache *cache)
386{
387 queue_work(cache->wq, &cache->worker);
388}
389
390
391
392static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
393{
394
395 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
396}
397
398static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
399{
400 dm_bio_prison_free_cell(cache->prison, cell);
401}
402
403static struct dm_cache_migration *alloc_migration(struct cache *cache)
404{
405 struct dm_cache_migration *mg;
406
407 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
408 if (mg) {
409 mg->cache = cache;
410 atomic_inc(&mg->cache->nr_allocated_migrations);
411 }
412
413 return mg;
414}
415
416static void free_migration(struct dm_cache_migration *mg)
417{
418 struct cache *cache = mg->cache;
419
420 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
421 wake_up(&cache->migration_wait);
422
423 mempool_free(mg, cache->migration_pool);
424}
425
426static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
427{
428 if (!p->mg) {
429 p->mg = alloc_migration(cache);
430 if (!p->mg)
431 return -ENOMEM;
432 }
433
434 if (!p->cell1) {
435 p->cell1 = alloc_prison_cell(cache);
436 if (!p->cell1)
437 return -ENOMEM;
438 }
439
440 if (!p->cell2) {
441 p->cell2 = alloc_prison_cell(cache);
442 if (!p->cell2)
443 return -ENOMEM;
444 }
445
446 return 0;
447}
448
449static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
450{
451 if (p->cell2)
452 free_prison_cell(cache, p->cell2);
453
454 if (p->cell1)
455 free_prison_cell(cache, p->cell1);
456
457 if (p->mg)
458 free_migration(p->mg);
459}
460
461static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
462{
463 struct dm_cache_migration *mg = p->mg;
464
465 BUG_ON(!mg);
466 p->mg = NULL;
467
468 return mg;
469}
470
471
472
473
474
475static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
476{
477 struct dm_bio_prison_cell *r = NULL;
478
479 if (p->cell1) {
480 r = p->cell1;
481 p->cell1 = NULL;
482
483 } else if (p->cell2) {
484 r = p->cell2;
485 p->cell2 = NULL;
486 } else
487 BUG();
488
489 return r;
490}
491
492
493
494
495
496static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
497{
498 if (!p->cell2)
499 p->cell2 = cell;
500
501 else if (!p->cell1)
502 p->cell1 = cell;
503
504 else
505 BUG();
506}
507
508
509
510static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
511{
512 key->virtual = 0;
513 key->dev = 0;
514 key->block_begin = from_oblock(begin);
515 key->block_end = from_oblock(end);
516}
517
518
519
520
521
522
523typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
524
525static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
526 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
527 cell_free_fn free_fn, void *free_context,
528 struct dm_bio_prison_cell **cell_result)
529{
530 int r;
531 struct dm_cell_key key;
532
533 build_key(oblock_begin, oblock_end, &key);
534 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
535 if (r)
536 free_fn(free_context, cell_prealloc);
537
538 return r;
539}
540
541static int bio_detain(struct cache *cache, dm_oblock_t oblock,
542 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
543 cell_free_fn free_fn, void *free_context,
544 struct dm_bio_prison_cell **cell_result)
545{
546 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
547 return bio_detain_range(cache, oblock, end, bio,
548 cell_prealloc, free_fn, free_context, cell_result);
549}
550
551static int get_cell(struct cache *cache,
552 dm_oblock_t oblock,
553 struct prealloc *structs,
554 struct dm_bio_prison_cell **cell_result)
555{
556 int r;
557 struct dm_cell_key key;
558 struct dm_bio_prison_cell *cell_prealloc;
559
560 cell_prealloc = prealloc_get_cell(structs);
561
562 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
563 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
564 if (r)
565 prealloc_put_cell(structs, cell_prealloc);
566
567 return r;
568}
569
570
571
572static bool is_dirty(struct cache *cache, dm_cblock_t b)
573{
574 return test_bit(from_cblock(b), cache->dirty_bitset);
575}
576
577static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
578{
579 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
580 atomic_inc(&cache->nr_dirty);
581 policy_set_dirty(cache->policy, oblock);
582 }
583}
584
585static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
586{
587 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
588 policy_clear_dirty(cache->policy, oblock);
589 if (atomic_dec_return(&cache->nr_dirty) == 0)
590 dm_table_event(cache->ti->table);
591 }
592}
593
594
595
596static bool block_size_is_power_of_two(struct cache *cache)
597{
598 return cache->sectors_per_block_shift >= 0;
599}
600
601
602#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
603__always_inline
604#endif
605static dm_block_t block_div(dm_block_t b, uint32_t n)
606{
607 do_div(b, n);
608
609 return b;
610}
611
612static dm_block_t oblocks_per_dblock(struct cache *cache)
613{
614 dm_block_t oblocks = cache->discard_block_size;
615
616 if (block_size_is_power_of_two(cache))
617 oblocks >>= cache->sectors_per_block_shift;
618 else
619 oblocks = block_div(oblocks, cache->sectors_per_block);
620
621 return oblocks;
622}
623
624static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
625{
626 return to_dblock(block_div(from_oblock(oblock),
627 oblocks_per_dblock(cache)));
628}
629
630static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
631{
632 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
633}
634
635static void set_discard(struct cache *cache, dm_dblock_t b)
636{
637 unsigned long flags;
638
639 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
640 atomic_inc(&cache->stats.discard_count);
641
642 spin_lock_irqsave(&cache->lock, flags);
643 set_bit(from_dblock(b), cache->discard_bitset);
644 spin_unlock_irqrestore(&cache->lock, flags);
645}
646
647static void clear_discard(struct cache *cache, dm_dblock_t b)
648{
649 unsigned long flags;
650
651 spin_lock_irqsave(&cache->lock, flags);
652 clear_bit(from_dblock(b), cache->discard_bitset);
653 spin_unlock_irqrestore(&cache->lock, flags);
654}
655
656static bool is_discarded(struct cache *cache, dm_dblock_t b)
657{
658 int r;
659 unsigned long flags;
660
661 spin_lock_irqsave(&cache->lock, flags);
662 r = test_bit(from_dblock(b), cache->discard_bitset);
663 spin_unlock_irqrestore(&cache->lock, flags);
664
665 return r;
666}
667
668static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
669{
670 int r;
671 unsigned long flags;
672
673 spin_lock_irqsave(&cache->lock, flags);
674 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
675 cache->discard_bitset);
676 spin_unlock_irqrestore(&cache->lock, flags);
677
678 return r;
679}
680
681
682
683static void load_stats(struct cache *cache)
684{
685 struct dm_cache_statistics stats;
686
687 dm_cache_metadata_get_stats(cache->cmd, &stats);
688 atomic_set(&cache->stats.read_hit, stats.read_hits);
689 atomic_set(&cache->stats.read_miss, stats.read_misses);
690 atomic_set(&cache->stats.write_hit, stats.write_hits);
691 atomic_set(&cache->stats.write_miss, stats.write_misses);
692}
693
694static void save_stats(struct cache *cache)
695{
696 struct dm_cache_statistics stats;
697
698 if (get_cache_mode(cache) >= CM_READ_ONLY)
699 return;
700
701 stats.read_hits = atomic_read(&cache->stats.read_hit);
702 stats.read_misses = atomic_read(&cache->stats.read_miss);
703 stats.write_hits = atomic_read(&cache->stats.write_hit);
704 stats.write_misses = atomic_read(&cache->stats.write_miss);
705
706 dm_cache_metadata_set_stats(cache->cmd, &stats);
707}
708
709
710
711
712
713
714
715
716#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
717#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
718
719static bool writethrough_mode(struct cache_features *f)
720{
721 return f->io_mode == CM_IO_WRITETHROUGH;
722}
723
724static bool writeback_mode(struct cache_features *f)
725{
726 return f->io_mode == CM_IO_WRITEBACK;
727}
728
729static bool passthrough_mode(struct cache_features *f)
730{
731 return f->io_mode == CM_IO_PASSTHROUGH;
732}
733
734static size_t get_per_bio_data_size(struct cache *cache)
735{
736 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
737}
738
739static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
740{
741 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
742 BUG_ON(!pb);
743 return pb;
744}
745
746static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
747{
748 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
749
750 pb->tick = false;
751 pb->req_nr = dm_bio_get_target_bio_nr(bio);
752 pb->all_io_entry = NULL;
753 pb->len = 0;
754
755 return pb;
756}
757
758
759
760
761static void remap_to_origin(struct cache *cache, struct bio *bio)
762{
763 bio->bi_bdev = cache->origin_dev->bdev;
764}
765
766static void remap_to_cache(struct cache *cache, struct bio *bio,
767 dm_cblock_t cblock)
768{
769 sector_t bi_sector = bio->bi_iter.bi_sector;
770 sector_t block = from_cblock(cblock);
771
772 bio->bi_bdev = cache->cache_dev->bdev;
773 if (!block_size_is_power_of_two(cache))
774 bio->bi_iter.bi_sector =
775 (block * cache->sectors_per_block) +
776 sector_div(bi_sector, cache->sectors_per_block);
777 else
778 bio->bi_iter.bi_sector =
779 (block << cache->sectors_per_block_shift) |
780 (bi_sector & (cache->sectors_per_block - 1));
781}
782
783static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
784{
785 unsigned long flags;
786 size_t pb_data_size = get_per_bio_data_size(cache);
787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
788
789 spin_lock_irqsave(&cache->lock, flags);
790 if (cache->need_tick_bio &&
791 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
792 pb->tick = true;
793 cache->need_tick_bio = false;
794 }
795 spin_unlock_irqrestore(&cache->lock, flags);
796}
797
798static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
799 dm_oblock_t oblock)
800{
801 check_if_tick_bio_needed(cache, bio);
802 remap_to_origin(cache, bio);
803 if (bio_data_dir(bio) == WRITE)
804 clear_discard(cache, oblock_to_dblock(cache, oblock));
805}
806
807static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
808 dm_oblock_t oblock, dm_cblock_t cblock)
809{
810 check_if_tick_bio_needed(cache, bio);
811 remap_to_cache(cache, bio, cblock);
812 if (bio_data_dir(bio) == WRITE) {
813 set_dirty(cache, oblock, cblock);
814 clear_discard(cache, oblock_to_dblock(cache, oblock));
815 }
816}
817
818static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
819{
820 sector_t block_nr = bio->bi_iter.bi_sector;
821
822 if (!block_size_is_power_of_two(cache))
823 (void) sector_div(block_nr, cache->sectors_per_block);
824 else
825 block_nr >>= cache->sectors_per_block_shift;
826
827 return to_oblock(block_nr);
828}
829
830static int bio_triggers_commit(struct cache *cache, struct bio *bio)
831{
832 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
833}
834
835
836
837
838
839static void inc_ds(struct cache *cache, struct bio *bio,
840 struct dm_bio_prison_cell *cell)
841{
842 size_t pb_data_size = get_per_bio_data_size(cache);
843 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
844
845 BUG_ON(!cell);
846 BUG_ON(pb->all_io_entry);
847
848 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
849}
850
851static bool accountable_bio(struct cache *cache, struct bio *bio)
852{
853 return ((bio->bi_bdev == cache->origin_dev->bdev) &&
854 !(bio->bi_rw & REQ_DISCARD));
855}
856
857static void accounted_begin(struct cache *cache, struct bio *bio)
858{
859 size_t pb_data_size = get_per_bio_data_size(cache);
860 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
861
862 if (accountable_bio(cache, bio)) {
863 pb->len = bio_sectors(bio);
864 iot_io_begin(&cache->origin_tracker, pb->len);
865 }
866}
867
868static void accounted_complete(struct cache *cache, struct bio *bio)
869{
870 size_t pb_data_size = get_per_bio_data_size(cache);
871 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
872
873 iot_io_end(&cache->origin_tracker, pb->len);
874}
875
876static void accounted_request(struct cache *cache, struct bio *bio)
877{
878 accounted_begin(cache, bio);
879 generic_make_request(bio);
880}
881
882static void issue(struct cache *cache, struct bio *bio)
883{
884 unsigned long flags;
885
886 if (!bio_triggers_commit(cache, bio)) {
887 accounted_request(cache, bio);
888 return;
889 }
890
891
892
893
894
895 spin_lock_irqsave(&cache->lock, flags);
896 cache->commit_requested = true;
897 bio_list_add(&cache->deferred_flush_bios, bio);
898 spin_unlock_irqrestore(&cache->lock, flags);
899}
900
901static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
902{
903 inc_ds(cache, bio, cell);
904 issue(cache, bio);
905}
906
907static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
908{
909 unsigned long flags;
910
911 spin_lock_irqsave(&cache->lock, flags);
912 bio_list_add(&cache->deferred_writethrough_bios, bio);
913 spin_unlock_irqrestore(&cache->lock, flags);
914
915 wake_worker(cache);
916}
917
918static void writethrough_endio(struct bio *bio)
919{
920 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
921
922 dm_unhook_bio(&pb->hook_info, bio);
923
924 if (bio->bi_error) {
925 bio_endio(bio);
926 return;
927 }
928
929 dm_bio_restore(&pb->bio_details, bio);
930 remap_to_cache(pb->cache, bio, pb->cblock);
931
932
933
934
935
936
937 defer_writethrough_bio(pb->cache, bio);
938}
939
940
941
942
943
944
945
946static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
947 dm_oblock_t oblock, dm_cblock_t cblock)
948{
949 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
950
951 pb->cache = cache;
952 pb->cblock = cblock;
953 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
954 dm_bio_record(&pb->bio_details, bio);
955
956 remap_to_origin_clear_discard(pb->cache, bio, oblock);
957}
958
959
960
961
962static enum cache_metadata_mode get_cache_mode(struct cache *cache)
963{
964 return cache->features.mode;
965}
966
967static const char *cache_device_name(struct cache *cache)
968{
969 return dm_device_name(dm_table_get_md(cache->ti->table));
970}
971
972static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
973{
974 const char *descs[] = {
975 "write",
976 "read-only",
977 "fail"
978 };
979
980 dm_table_event(cache->ti->table);
981 DMINFO("%s: switching cache to %s mode",
982 cache_device_name(cache), descs[(int)mode]);
983}
984
985static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
986{
987 bool needs_check;
988 enum cache_metadata_mode old_mode = get_cache_mode(cache);
989
990 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
991 DMERR("unable to read needs_check flag, setting failure mode");
992 new_mode = CM_FAIL;
993 }
994
995 if (new_mode == CM_WRITE && needs_check) {
996 DMERR("%s: unable to switch cache to write mode until repaired.",
997 cache_device_name(cache));
998 if (old_mode != new_mode)
999 new_mode = old_mode;
1000 else
1001 new_mode = CM_READ_ONLY;
1002 }
1003
1004
1005 if (old_mode == CM_FAIL)
1006 new_mode = CM_FAIL;
1007
1008 switch (new_mode) {
1009 case CM_FAIL:
1010 case CM_READ_ONLY:
1011 dm_cache_metadata_set_read_only(cache->cmd);
1012 break;
1013
1014 case CM_WRITE:
1015 dm_cache_metadata_set_read_write(cache->cmd);
1016 break;
1017 }
1018
1019 cache->features.mode = new_mode;
1020
1021 if (new_mode != old_mode)
1022 notify_mode_switch(cache, new_mode);
1023}
1024
1025static void abort_transaction(struct cache *cache)
1026{
1027 const char *dev_name = cache_device_name(cache);
1028
1029 if (get_cache_mode(cache) >= CM_READ_ONLY)
1030 return;
1031
1032 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
1033 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
1034 set_cache_mode(cache, CM_FAIL);
1035 }
1036
1037 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1038 if (dm_cache_metadata_abort(cache->cmd)) {
1039 DMERR("%s: failed to abort metadata transaction", dev_name);
1040 set_cache_mode(cache, CM_FAIL);
1041 }
1042}
1043
1044static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1045{
1046 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1047 cache_device_name(cache), op, r);
1048 abort_transaction(cache);
1049 set_cache_mode(cache, CM_READ_ONLY);
1050}
1051
1052
1053
1054
1055
1056
1057
1058static void inc_io_migrations(struct cache *cache)
1059{
1060 atomic_inc(&cache->nr_io_migrations);
1061}
1062
1063static void dec_io_migrations(struct cache *cache)
1064{
1065 atomic_dec(&cache->nr_io_migrations);
1066}
1067
1068static bool discard_or_flush(struct bio *bio)
1069{
1070 return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD);
1071}
1072
1073static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
1074{
1075 if (discard_or_flush(cell->holder)) {
1076
1077
1078
1079 dm_cell_release(cache->prison, cell, &cache->deferred_bios);
1080 free_prison_cell(cache, cell);
1081 } else
1082 list_add_tail(&cell->user_list, &cache->deferred_cells);
1083}
1084
1085static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder)
1086{
1087 unsigned long flags;
1088
1089 if (!holder && dm_cell_promote_or_release(cache->prison, cell)) {
1090
1091
1092
1093
1094 free_prison_cell(cache, cell);
1095 return;
1096 }
1097
1098 spin_lock_irqsave(&cache->lock, flags);
1099 __cell_defer(cache, cell);
1100 spin_unlock_irqrestore(&cache->lock, flags);
1101
1102 wake_worker(cache);
1103}
1104
1105static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err)
1106{
1107 dm_cell_error(cache->prison, cell, err);
1108 free_prison_cell(cache, cell);
1109}
1110
1111static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)
1112{
1113 cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE);
1114}
1115
1116static void free_io_migration(struct dm_cache_migration *mg)
1117{
1118 struct cache *cache = mg->cache;
1119
1120 dec_io_migrations(cache);
1121 free_migration(mg);
1122 wake_worker(cache);
1123}
1124
1125static void migration_failure(struct dm_cache_migration *mg)
1126{
1127 struct cache *cache = mg->cache;
1128 const char *dev_name = cache_device_name(cache);
1129
1130 if (mg->writeback) {
1131 DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name);
1132 set_dirty(cache, mg->old_oblock, mg->cblock);
1133 cell_defer(cache, mg->old_ocell, false);
1134
1135 } else if (mg->demote) {
1136 DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name);
1137 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
1138
1139 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1140 if (mg->promote)
1141 cell_defer(cache, mg->new_ocell, true);
1142 } else {
1143 DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name);
1144 policy_remove_mapping(cache->policy, mg->new_oblock);
1145 cell_defer(cache, mg->new_ocell, true);
1146 }
1147
1148 free_io_migration(mg);
1149}
1150
1151static void migration_success_pre_commit(struct dm_cache_migration *mg)
1152{
1153 int r;
1154 unsigned long flags;
1155 struct cache *cache = mg->cache;
1156
1157 if (mg->writeback) {
1158 clear_dirty(cache, mg->old_oblock, mg->cblock);
1159 cell_defer(cache, mg->old_ocell, false);
1160 free_io_migration(mg);
1161 return;
1162
1163 } else if (mg->demote) {
1164 r = dm_cache_remove_mapping(cache->cmd, mg->cblock);
1165 if (r) {
1166 DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata",
1167 cache_device_name(cache));
1168 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1169 policy_force_mapping(cache->policy, mg->new_oblock,
1170 mg->old_oblock);
1171 if (mg->promote)
1172 cell_defer(cache, mg->new_ocell, true);
1173 free_io_migration(mg);
1174 return;
1175 }
1176 } else {
1177 r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock);
1178 if (r) {
1179 DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata",
1180 cache_device_name(cache));
1181 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1182 policy_remove_mapping(cache->policy, mg->new_oblock);
1183 free_io_migration(mg);
1184 return;
1185 }
1186 }
1187
1188 spin_lock_irqsave(&cache->lock, flags);
1189 list_add_tail(&mg->list, &cache->need_commit_migrations);
1190 cache->commit_requested = true;
1191 spin_unlock_irqrestore(&cache->lock, flags);
1192}
1193
1194static void migration_success_post_commit(struct dm_cache_migration *mg)
1195{
1196 unsigned long flags;
1197 struct cache *cache = mg->cache;
1198
1199 if (mg->writeback) {
1200 DMWARN_LIMIT("%s: writeback unexpectedly triggered commit",
1201 cache_device_name(cache));
1202 return;
1203
1204 } else if (mg->demote) {
1205 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1206
1207 if (mg->promote) {
1208 mg->demote = false;
1209
1210 spin_lock_irqsave(&cache->lock, flags);
1211 list_add_tail(&mg->list, &cache->quiesced_migrations);
1212 spin_unlock_irqrestore(&cache->lock, flags);
1213
1214 } else {
1215 if (mg->invalidate)
1216 policy_remove_mapping(cache->policy, mg->old_oblock);
1217 free_io_migration(mg);
1218 }
1219
1220 } else {
1221 if (mg->requeue_holder) {
1222 clear_dirty(cache, mg->new_oblock, mg->cblock);
1223 cell_defer(cache, mg->new_ocell, true);
1224 } else {
1225
1226
1227
1228 set_dirty(cache, mg->new_oblock, mg->cblock);
1229 bio_endio(mg->new_ocell->holder);
1230 cell_defer(cache, mg->new_ocell, false);
1231 }
1232 free_io_migration(mg);
1233 }
1234}
1235
1236static void copy_complete(int read_err, unsigned long write_err, void *context)
1237{
1238 unsigned long flags;
1239 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1240 struct cache *cache = mg->cache;
1241
1242 if (read_err || write_err)
1243 mg->err = true;
1244
1245 spin_lock_irqsave(&cache->lock, flags);
1246 list_add_tail(&mg->list, &cache->completed_migrations);
1247 spin_unlock_irqrestore(&cache->lock, flags);
1248
1249 wake_worker(cache);
1250}
1251
1252static void issue_copy(struct dm_cache_migration *mg)
1253{
1254 int r;
1255 struct dm_io_region o_region, c_region;
1256 struct cache *cache = mg->cache;
1257 sector_t cblock = from_cblock(mg->cblock);
1258
1259 o_region.bdev = cache->origin_dev->bdev;
1260 o_region.count = cache->sectors_per_block;
1261
1262 c_region.bdev = cache->cache_dev->bdev;
1263 c_region.sector = cblock * cache->sectors_per_block;
1264 c_region.count = cache->sectors_per_block;
1265
1266 if (mg->writeback || mg->demote) {
1267
1268 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1269 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1270 } else {
1271
1272 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1273 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1274 }
1275
1276 if (r < 0) {
1277 DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache));
1278 migration_failure(mg);
1279 }
1280}
1281
1282static void overwrite_endio(struct bio *bio)
1283{
1284 struct dm_cache_migration *mg = bio->bi_private;
1285 struct cache *cache = mg->cache;
1286 size_t pb_data_size = get_per_bio_data_size(cache);
1287 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1288 unsigned long flags;
1289
1290 dm_unhook_bio(&pb->hook_info, bio);
1291
1292 if (bio->bi_error)
1293 mg->err = true;
1294
1295 mg->requeue_holder = false;
1296
1297 spin_lock_irqsave(&cache->lock, flags);
1298 list_add_tail(&mg->list, &cache->completed_migrations);
1299 spin_unlock_irqrestore(&cache->lock, flags);
1300
1301 wake_worker(cache);
1302}
1303
1304static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1305{
1306 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1307 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1308
1309 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1310 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1311
1312
1313
1314
1315
1316 accounted_request(mg->cache, bio);
1317}
1318
1319static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1320{
1321 return (bio_data_dir(bio) == WRITE) &&
1322 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1323}
1324
1325static void avoid_copy(struct dm_cache_migration *mg)
1326{
1327 atomic_inc(&mg->cache->stats.copies_avoided);
1328 migration_success_pre_commit(mg);
1329}
1330
1331static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1332 dm_dblock_t *b, dm_dblock_t *e)
1333{
1334 sector_t sb = bio->bi_iter.bi_sector;
1335 sector_t se = bio_end_sector(bio);
1336
1337 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1338
1339 if (se - sb < cache->discard_block_size)
1340 *e = *b;
1341 else
1342 *e = to_dblock(block_div(se, cache->discard_block_size));
1343}
1344
1345static void issue_discard(struct dm_cache_migration *mg)
1346{
1347 dm_dblock_t b, e;
1348 struct bio *bio = mg->new_ocell->holder;
1349 struct cache *cache = mg->cache;
1350
1351 calc_discard_block_range(cache, bio, &b, &e);
1352 while (b != e) {
1353 set_discard(cache, b);
1354 b = to_dblock(from_dblock(b) + 1);
1355 }
1356
1357 bio_endio(bio);
1358 cell_defer(cache, mg->new_ocell, false);
1359 free_migration(mg);
1360 wake_worker(cache);
1361}
1362
1363static void issue_copy_or_discard(struct dm_cache_migration *mg)
1364{
1365 bool avoid;
1366 struct cache *cache = mg->cache;
1367
1368 if (mg->discard) {
1369 issue_discard(mg);
1370 return;
1371 }
1372
1373 if (mg->writeback || mg->demote)
1374 avoid = !is_dirty(cache, mg->cblock) ||
1375 is_discarded_oblock(cache, mg->old_oblock);
1376 else {
1377 struct bio *bio = mg->new_ocell->holder;
1378
1379 avoid = is_discarded_oblock(cache, mg->new_oblock);
1380
1381 if (writeback_mode(&cache->features) &&
1382 !avoid && bio_writes_complete_block(cache, bio)) {
1383 issue_overwrite(mg, bio);
1384 return;
1385 }
1386 }
1387
1388 avoid ? avoid_copy(mg) : issue_copy(mg);
1389}
1390
1391static void complete_migration(struct dm_cache_migration *mg)
1392{
1393 if (mg->err)
1394 migration_failure(mg);
1395 else
1396 migration_success_pre_commit(mg);
1397}
1398
1399static void process_migrations(struct cache *cache, struct list_head *head,
1400 void (*fn)(struct dm_cache_migration *))
1401{
1402 unsigned long flags;
1403 struct list_head list;
1404 struct dm_cache_migration *mg, *tmp;
1405
1406 INIT_LIST_HEAD(&list);
1407 spin_lock_irqsave(&cache->lock, flags);
1408 list_splice_init(head, &list);
1409 spin_unlock_irqrestore(&cache->lock, flags);
1410
1411 list_for_each_entry_safe(mg, tmp, &list, list)
1412 fn(mg);
1413}
1414
1415static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1416{
1417 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1418}
1419
1420static void queue_quiesced_migration(struct dm_cache_migration *mg)
1421{
1422 unsigned long flags;
1423 struct cache *cache = mg->cache;
1424
1425 spin_lock_irqsave(&cache->lock, flags);
1426 __queue_quiesced_migration(mg);
1427 spin_unlock_irqrestore(&cache->lock, flags);
1428
1429 wake_worker(cache);
1430}
1431
1432static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1433{
1434 unsigned long flags;
1435 struct dm_cache_migration *mg, *tmp;
1436
1437 spin_lock_irqsave(&cache->lock, flags);
1438 list_for_each_entry_safe(mg, tmp, work, list)
1439 __queue_quiesced_migration(mg);
1440 spin_unlock_irqrestore(&cache->lock, flags);
1441
1442 wake_worker(cache);
1443}
1444
1445static void check_for_quiesced_migrations(struct cache *cache,
1446 struct per_bio_data *pb)
1447{
1448 struct list_head work;
1449
1450 if (!pb->all_io_entry)
1451 return;
1452
1453 INIT_LIST_HEAD(&work);
1454 dm_deferred_entry_dec(pb->all_io_entry, &work);
1455
1456 if (!list_empty(&work))
1457 queue_quiesced_migrations(cache, &work);
1458}
1459
1460static void quiesce_migration(struct dm_cache_migration *mg)
1461{
1462 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1463 queue_quiesced_migration(mg);
1464}
1465
1466static void promote(struct cache *cache, struct prealloc *structs,
1467 dm_oblock_t oblock, dm_cblock_t cblock,
1468 struct dm_bio_prison_cell *cell)
1469{
1470 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1471
1472 mg->err = false;
1473 mg->discard = false;
1474 mg->writeback = false;
1475 mg->demote = false;
1476 mg->promote = true;
1477 mg->requeue_holder = true;
1478 mg->invalidate = false;
1479 mg->cache = cache;
1480 mg->new_oblock = oblock;
1481 mg->cblock = cblock;
1482 mg->old_ocell = NULL;
1483 mg->new_ocell = cell;
1484 mg->start_jiffies = jiffies;
1485
1486 inc_io_migrations(cache);
1487 quiesce_migration(mg);
1488}
1489
1490static void writeback(struct cache *cache, struct prealloc *structs,
1491 dm_oblock_t oblock, dm_cblock_t cblock,
1492 struct dm_bio_prison_cell *cell)
1493{
1494 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1495
1496 mg->err = false;
1497 mg->discard = false;
1498 mg->writeback = true;
1499 mg->demote = false;
1500 mg->promote = false;
1501 mg->requeue_holder = true;
1502 mg->invalidate = false;
1503 mg->cache = cache;
1504 mg->old_oblock = oblock;
1505 mg->cblock = cblock;
1506 mg->old_ocell = cell;
1507 mg->new_ocell = NULL;
1508 mg->start_jiffies = jiffies;
1509
1510 inc_io_migrations(cache);
1511 quiesce_migration(mg);
1512}
1513
1514static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1515 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1516 dm_cblock_t cblock,
1517 struct dm_bio_prison_cell *old_ocell,
1518 struct dm_bio_prison_cell *new_ocell)
1519{
1520 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1521
1522 mg->err = false;
1523 mg->discard = false;
1524 mg->writeback = false;
1525 mg->demote = true;
1526 mg->promote = true;
1527 mg->requeue_holder = true;
1528 mg->invalidate = false;
1529 mg->cache = cache;
1530 mg->old_oblock = old_oblock;
1531 mg->new_oblock = new_oblock;
1532 mg->cblock = cblock;
1533 mg->old_ocell = old_ocell;
1534 mg->new_ocell = new_ocell;
1535 mg->start_jiffies = jiffies;
1536
1537 inc_io_migrations(cache);
1538 quiesce_migration(mg);
1539}
1540
1541
1542
1543
1544
1545static void invalidate(struct cache *cache, struct prealloc *structs,
1546 dm_oblock_t oblock, dm_cblock_t cblock,
1547 struct dm_bio_prison_cell *cell)
1548{
1549 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1550
1551 mg->err = false;
1552 mg->discard = false;
1553 mg->writeback = false;
1554 mg->demote = true;
1555 mg->promote = false;
1556 mg->requeue_holder = true;
1557 mg->invalidate = true;
1558 mg->cache = cache;
1559 mg->old_oblock = oblock;
1560 mg->cblock = cblock;
1561 mg->old_ocell = cell;
1562 mg->new_ocell = NULL;
1563 mg->start_jiffies = jiffies;
1564
1565 inc_io_migrations(cache);
1566 quiesce_migration(mg);
1567}
1568
1569static void discard(struct cache *cache, struct prealloc *structs,
1570 struct dm_bio_prison_cell *cell)
1571{
1572 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1573
1574 mg->err = false;
1575 mg->discard = true;
1576 mg->writeback = false;
1577 mg->demote = false;
1578 mg->promote = false;
1579 mg->requeue_holder = false;
1580 mg->invalidate = false;
1581 mg->cache = cache;
1582 mg->old_ocell = NULL;
1583 mg->new_ocell = cell;
1584 mg->start_jiffies = jiffies;
1585
1586 quiesce_migration(mg);
1587}
1588
1589
1590
1591
1592static void defer_bio(struct cache *cache, struct bio *bio)
1593{
1594 unsigned long flags;
1595
1596 spin_lock_irqsave(&cache->lock, flags);
1597 bio_list_add(&cache->deferred_bios, bio);
1598 spin_unlock_irqrestore(&cache->lock, flags);
1599
1600 wake_worker(cache);
1601}
1602
1603static void process_flush_bio(struct cache *cache, struct bio *bio)
1604{
1605 size_t pb_data_size = get_per_bio_data_size(cache);
1606 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1607
1608 BUG_ON(bio->bi_iter.bi_size);
1609 if (!pb->req_nr)
1610 remap_to_origin(cache, bio);
1611 else
1612 remap_to_cache(cache, bio, 0);
1613
1614
1615
1616
1617
1618
1619 issue(cache, bio);
1620}
1621
1622static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1623 struct bio *bio)
1624{
1625 int r;
1626 dm_dblock_t b, e;
1627 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1628
1629 calc_discard_block_range(cache, bio, &b, &e);
1630 if (b == e) {
1631 bio_endio(bio);
1632 return;
1633 }
1634
1635 cell_prealloc = prealloc_get_cell(structs);
1636 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1637 (cell_free_fn) prealloc_put_cell,
1638 structs, &new_ocell);
1639 if (r > 0)
1640 return;
1641
1642 discard(cache, structs, new_ocell);
1643}
1644
1645static bool spare_migration_bandwidth(struct cache *cache)
1646{
1647 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1648 cache->sectors_per_block;
1649 return current_volume < cache->migration_threshold;
1650}
1651
1652static void inc_hit_counter(struct cache *cache, struct bio *bio)
1653{
1654 atomic_inc(bio_data_dir(bio) == READ ?
1655 &cache->stats.read_hit : &cache->stats.write_hit);
1656}
1657
1658static void inc_miss_counter(struct cache *cache, struct bio *bio)
1659{
1660 atomic_inc(bio_data_dir(bio) == READ ?
1661 &cache->stats.read_miss : &cache->stats.write_miss);
1662}
1663
1664
1665
1666struct inc_detail {
1667 struct cache *cache;
1668 struct bio_list bios_for_issue;
1669 struct bio_list unhandled_bios;
1670 bool any_writes;
1671};
1672
1673static void inc_fn(void *context, struct dm_bio_prison_cell *cell)
1674{
1675 struct bio *bio;
1676 struct inc_detail *detail = context;
1677 struct cache *cache = detail->cache;
1678
1679 inc_ds(cache, cell->holder, cell);
1680 if (bio_data_dir(cell->holder) == WRITE)
1681 detail->any_writes = true;
1682
1683 while ((bio = bio_list_pop(&cell->bios))) {
1684 if (discard_or_flush(bio)) {
1685 bio_list_add(&detail->unhandled_bios, bio);
1686 continue;
1687 }
1688
1689 if (bio_data_dir(bio) == WRITE)
1690 detail->any_writes = true;
1691
1692 bio_list_add(&detail->bios_for_issue, bio);
1693 inc_ds(cache, bio, cell);
1694 }
1695}
1696
1697
1698static void remap_cell_to_origin_clear_discard(struct cache *cache,
1699 struct dm_bio_prison_cell *cell,
1700 dm_oblock_t oblock, bool issue_holder)
1701{
1702 struct bio *bio;
1703 unsigned long flags;
1704 struct inc_detail detail;
1705
1706 detail.cache = cache;
1707 bio_list_init(&detail.bios_for_issue);
1708 bio_list_init(&detail.unhandled_bios);
1709 detail.any_writes = false;
1710
1711 spin_lock_irqsave(&cache->lock, flags);
1712 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1713 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1714 spin_unlock_irqrestore(&cache->lock, flags);
1715
1716 remap_to_origin(cache, cell->holder);
1717 if (issue_holder)
1718 issue(cache, cell->holder);
1719 else
1720 accounted_begin(cache, cell->holder);
1721
1722 if (detail.any_writes)
1723 clear_discard(cache, oblock_to_dblock(cache, oblock));
1724
1725 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1726 remap_to_origin(cache, bio);
1727 issue(cache, bio);
1728 }
1729
1730 free_prison_cell(cache, cell);
1731}
1732
1733static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell,
1734 dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder)
1735{
1736 struct bio *bio;
1737 unsigned long flags;
1738 struct inc_detail detail;
1739
1740 detail.cache = cache;
1741 bio_list_init(&detail.bios_for_issue);
1742 bio_list_init(&detail.unhandled_bios);
1743 detail.any_writes = false;
1744
1745 spin_lock_irqsave(&cache->lock, flags);
1746 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1747 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1748 spin_unlock_irqrestore(&cache->lock, flags);
1749
1750 remap_to_cache(cache, cell->holder, cblock);
1751 if (issue_holder)
1752 issue(cache, cell->holder);
1753 else
1754 accounted_begin(cache, cell->holder);
1755
1756 if (detail.any_writes) {
1757 set_dirty(cache, oblock, cblock);
1758 clear_discard(cache, oblock_to_dblock(cache, oblock));
1759 }
1760
1761 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1762 remap_to_cache(cache, bio, cblock);
1763 issue(cache, bio);
1764 }
1765
1766 free_prison_cell(cache, cell);
1767}
1768
1769
1770
1771struct old_oblock_lock {
1772 struct policy_locker locker;
1773 struct cache *cache;
1774 struct prealloc *structs;
1775 struct dm_bio_prison_cell *cell;
1776};
1777
1778static int null_locker(struct policy_locker *locker, dm_oblock_t b)
1779{
1780
1781 BUG();
1782 return 0;
1783}
1784
1785static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
1786{
1787 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
1788 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
1789
1790 return bio_detain(l->cache, b, NULL, cell_prealloc,
1791 (cell_free_fn) prealloc_put_cell,
1792 l->structs, &l->cell);
1793}
1794
1795static void process_cell(struct cache *cache, struct prealloc *structs,
1796 struct dm_bio_prison_cell *new_ocell)
1797{
1798 int r;
1799 bool release_cell = true;
1800 struct bio *bio = new_ocell->holder;
1801 dm_oblock_t block = get_bio_block(cache, bio);
1802 struct policy_result lookup_result;
1803 bool passthrough = passthrough_mode(&cache->features);
1804 bool fast_promotion, can_migrate;
1805 struct old_oblock_lock ool;
1806
1807 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
1808 can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache));
1809
1810 ool.locker.fn = cell_locker;
1811 ool.cache = cache;
1812 ool.structs = structs;
1813 ool.cell = NULL;
1814 r = policy_map(cache->policy, block, true, can_migrate, fast_promotion,
1815 bio, &ool.locker, &lookup_result);
1816
1817 if (r == -EWOULDBLOCK)
1818
1819 lookup_result.op = POLICY_MISS;
1820
1821 switch (lookup_result.op) {
1822 case POLICY_HIT:
1823 if (passthrough) {
1824 inc_miss_counter(cache, bio);
1825
1826
1827
1828
1829
1830
1831
1832 if (bio_data_dir(bio) == WRITE) {
1833 atomic_inc(&cache->stats.demotion);
1834 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1835 release_cell = false;
1836
1837 } else {
1838
1839 remap_to_origin_clear_discard(cache, bio, block);
1840 inc_and_issue(cache, bio, new_ocell);
1841 }
1842 } else {
1843 inc_hit_counter(cache, bio);
1844
1845 if (bio_data_dir(bio) == WRITE &&
1846 writethrough_mode(&cache->features) &&
1847 !is_dirty(cache, lookup_result.cblock)) {
1848 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1849 inc_and_issue(cache, bio, new_ocell);
1850
1851 } else {
1852 remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true);
1853 release_cell = false;
1854 }
1855 }
1856
1857 break;
1858
1859 case POLICY_MISS:
1860 inc_miss_counter(cache, bio);
1861 remap_cell_to_origin_clear_discard(cache, new_ocell, block, true);
1862 release_cell = false;
1863 break;
1864
1865 case POLICY_NEW:
1866 atomic_inc(&cache->stats.promotion);
1867 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1868 release_cell = false;
1869 break;
1870
1871 case POLICY_REPLACE:
1872 atomic_inc(&cache->stats.demotion);
1873 atomic_inc(&cache->stats.promotion);
1874 demote_then_promote(cache, structs, lookup_result.old_oblock,
1875 block, lookup_result.cblock,
1876 ool.cell, new_ocell);
1877 release_cell = false;
1878 break;
1879
1880 default:
1881 DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u",
1882 cache_device_name(cache), __func__,
1883 (unsigned) lookup_result.op);
1884 bio_io_error(bio);
1885 }
1886
1887 if (release_cell)
1888 cell_defer(cache, new_ocell, false);
1889}
1890
1891static void process_bio(struct cache *cache, struct prealloc *structs,
1892 struct bio *bio)
1893{
1894 int r;
1895 dm_oblock_t block = get_bio_block(cache, bio);
1896 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1897
1898
1899
1900
1901 cell_prealloc = prealloc_get_cell(structs);
1902 r = bio_detain(cache, block, bio, cell_prealloc,
1903 (cell_free_fn) prealloc_put_cell,
1904 structs, &new_ocell);
1905 if (r > 0)
1906 return;
1907
1908 process_cell(cache, structs, new_ocell);
1909}
1910
1911static int need_commit_due_to_time(struct cache *cache)
1912{
1913 return jiffies < cache->last_commit_jiffies ||
1914 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
1915}
1916
1917
1918
1919
1920static int commit(struct cache *cache, bool clean_shutdown)
1921{
1922 int r;
1923
1924 if (get_cache_mode(cache) >= CM_READ_ONLY)
1925 return -EINVAL;
1926
1927 atomic_inc(&cache->stats.commit_count);
1928 r = dm_cache_commit(cache->cmd, clean_shutdown);
1929 if (r)
1930 metadata_operation_failed(cache, "dm_cache_commit", r);
1931
1932 return r;
1933}
1934
1935static int commit_if_needed(struct cache *cache)
1936{
1937 int r = 0;
1938
1939 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1940 dm_cache_changed_this_transaction(cache->cmd)) {
1941 r = commit(cache, false);
1942 cache->commit_requested = false;
1943 cache->last_commit_jiffies = jiffies;
1944 }
1945
1946 return r;
1947}
1948
1949static void process_deferred_bios(struct cache *cache)
1950{
1951 bool prealloc_used = false;
1952 unsigned long flags;
1953 struct bio_list bios;
1954 struct bio *bio;
1955 struct prealloc structs;
1956
1957 memset(&structs, 0, sizeof(structs));
1958 bio_list_init(&bios);
1959
1960 spin_lock_irqsave(&cache->lock, flags);
1961 bio_list_merge(&bios, &cache->deferred_bios);
1962 bio_list_init(&cache->deferred_bios);
1963 spin_unlock_irqrestore(&cache->lock, flags);
1964
1965 while (!bio_list_empty(&bios)) {
1966
1967
1968
1969
1970
1971 prealloc_used = true;
1972 if (prealloc_data_structs(cache, &structs)) {
1973 spin_lock_irqsave(&cache->lock, flags);
1974 bio_list_merge(&cache->deferred_bios, &bios);
1975 spin_unlock_irqrestore(&cache->lock, flags);
1976 break;
1977 }
1978
1979 bio = bio_list_pop(&bios);
1980
1981 if (bio->bi_rw & REQ_FLUSH)
1982 process_flush_bio(cache, bio);
1983 else if (bio->bi_rw & REQ_DISCARD)
1984 process_discard_bio(cache, &structs, bio);
1985 else
1986 process_bio(cache, &structs, bio);
1987 }
1988
1989 if (prealloc_used)
1990 prealloc_free_structs(cache, &structs);
1991}
1992
1993static void process_deferred_cells(struct cache *cache)
1994{
1995 bool prealloc_used = false;
1996 unsigned long flags;
1997 struct dm_bio_prison_cell *cell, *tmp;
1998 struct list_head cells;
1999 struct prealloc structs;
2000
2001 memset(&structs, 0, sizeof(structs));
2002
2003 INIT_LIST_HEAD(&cells);
2004
2005 spin_lock_irqsave(&cache->lock, flags);
2006 list_splice_init(&cache->deferred_cells, &cells);
2007 spin_unlock_irqrestore(&cache->lock, flags);
2008
2009 list_for_each_entry_safe(cell, tmp, &cells, user_list) {
2010
2011
2012
2013
2014
2015 prealloc_used = true;
2016 if (prealloc_data_structs(cache, &structs)) {
2017 spin_lock_irqsave(&cache->lock, flags);
2018 list_splice(&cells, &cache->deferred_cells);
2019 spin_unlock_irqrestore(&cache->lock, flags);
2020 break;
2021 }
2022
2023 process_cell(cache, &structs, cell);
2024 }
2025
2026 if (prealloc_used)
2027 prealloc_free_structs(cache, &structs);
2028}
2029
2030static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
2031{
2032 unsigned long flags;
2033 struct bio_list bios;
2034 struct bio *bio;
2035
2036 bio_list_init(&bios);
2037
2038 spin_lock_irqsave(&cache->lock, flags);
2039 bio_list_merge(&bios, &cache->deferred_flush_bios);
2040 bio_list_init(&cache->deferred_flush_bios);
2041 spin_unlock_irqrestore(&cache->lock, flags);
2042
2043
2044
2045
2046 while ((bio = bio_list_pop(&bios)))
2047 submit_bios ? accounted_request(cache, bio) : bio_io_error(bio);
2048}
2049
2050static void process_deferred_writethrough_bios(struct cache *cache)
2051{
2052 unsigned long flags;
2053 struct bio_list bios;
2054 struct bio *bio;
2055
2056 bio_list_init(&bios);
2057
2058 spin_lock_irqsave(&cache->lock, flags);
2059 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
2060 bio_list_init(&cache->deferred_writethrough_bios);
2061 spin_unlock_irqrestore(&cache->lock, flags);
2062
2063
2064
2065
2066 while ((bio = bio_list_pop(&bios)))
2067 accounted_request(cache, bio);
2068}
2069
2070static void writeback_some_dirty_blocks(struct cache *cache)
2071{
2072 bool prealloc_used = false;
2073 dm_oblock_t oblock;
2074 dm_cblock_t cblock;
2075 struct prealloc structs;
2076 struct dm_bio_prison_cell *old_ocell;
2077 bool busy = !iot_idle_for(&cache->origin_tracker, HZ);
2078
2079 memset(&structs, 0, sizeof(structs));
2080
2081 while (spare_migration_bandwidth(cache)) {
2082 if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
2083 break;
2084
2085 prealloc_used = true;
2086 if (prealloc_data_structs(cache, &structs) ||
2087 get_cell(cache, oblock, &structs, &old_ocell)) {
2088 policy_set_dirty(cache->policy, oblock);
2089 break;
2090 }
2091
2092 writeback(cache, &structs, oblock, cblock, old_ocell);
2093 }
2094
2095 if (prealloc_used)
2096 prealloc_free_structs(cache, &structs);
2097}
2098
2099
2100
2101
2102
2103
2104static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
2105{
2106 int r = 0;
2107 uint64_t begin = from_cblock(req->cblocks->begin);
2108 uint64_t end = from_cblock(req->cblocks->end);
2109
2110 while (begin != end) {
2111 r = policy_remove_cblock(cache->policy, to_cblock(begin));
2112 if (!r) {
2113 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
2114 if (r) {
2115 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
2116 break;
2117 }
2118
2119 } else if (r == -ENODATA) {
2120
2121 r = 0;
2122
2123 } else {
2124 DMERR("%s: policy_remove_cblock failed", cache_device_name(cache));
2125 break;
2126 }
2127
2128 begin++;
2129 }
2130
2131 cache->commit_requested = true;
2132
2133 req->err = r;
2134 atomic_set(&req->complete, 1);
2135
2136 wake_up(&req->result_wait);
2137}
2138
2139static void process_invalidation_requests(struct cache *cache)
2140{
2141 struct list_head list;
2142 struct invalidation_request *req, *tmp;
2143
2144 INIT_LIST_HEAD(&list);
2145 spin_lock(&cache->invalidation_lock);
2146 list_splice_init(&cache->invalidation_requests, &list);
2147 spin_unlock(&cache->invalidation_lock);
2148
2149 list_for_each_entry_safe (req, tmp, &list, list)
2150 process_invalidation_request(cache, req);
2151}
2152
2153
2154
2155
2156static bool is_quiescing(struct cache *cache)
2157{
2158 return atomic_read(&cache->quiescing);
2159}
2160
2161static void ack_quiescing(struct cache *cache)
2162{
2163 if (is_quiescing(cache)) {
2164 atomic_inc(&cache->quiescing_ack);
2165 wake_up(&cache->quiescing_wait);
2166 }
2167}
2168
2169static void wait_for_quiescing_ack(struct cache *cache)
2170{
2171 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
2172}
2173
2174static void start_quiescing(struct cache *cache)
2175{
2176 atomic_inc(&cache->quiescing);
2177 wait_for_quiescing_ack(cache);
2178}
2179
2180static void stop_quiescing(struct cache *cache)
2181{
2182 atomic_set(&cache->quiescing, 0);
2183 atomic_set(&cache->quiescing_ack, 0);
2184}
2185
2186static void wait_for_migrations(struct cache *cache)
2187{
2188 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
2189}
2190
2191static void stop_worker(struct cache *cache)
2192{
2193 cancel_delayed_work(&cache->waker);
2194 flush_workqueue(cache->wq);
2195}
2196
2197static void requeue_deferred_cells(struct cache *cache)
2198{
2199 unsigned long flags;
2200 struct list_head cells;
2201 struct dm_bio_prison_cell *cell, *tmp;
2202
2203 INIT_LIST_HEAD(&cells);
2204 spin_lock_irqsave(&cache->lock, flags);
2205 list_splice_init(&cache->deferred_cells, &cells);
2206 spin_unlock_irqrestore(&cache->lock, flags);
2207
2208 list_for_each_entry_safe(cell, tmp, &cells, user_list)
2209 cell_requeue(cache, cell);
2210}
2211
2212static void requeue_deferred_bios(struct cache *cache)
2213{
2214 struct bio *bio;
2215 struct bio_list bios;
2216
2217 bio_list_init(&bios);
2218 bio_list_merge(&bios, &cache->deferred_bios);
2219 bio_list_init(&cache->deferred_bios);
2220
2221 while ((bio = bio_list_pop(&bios))) {
2222 bio->bi_error = DM_ENDIO_REQUEUE;
2223 bio_endio(bio);
2224 }
2225}
2226
2227static int more_work(struct cache *cache)
2228{
2229 if (is_quiescing(cache))
2230 return !list_empty(&cache->quiesced_migrations) ||
2231 !list_empty(&cache->completed_migrations) ||
2232 !list_empty(&cache->need_commit_migrations);
2233 else
2234 return !bio_list_empty(&cache->deferred_bios) ||
2235 !list_empty(&cache->deferred_cells) ||
2236 !bio_list_empty(&cache->deferred_flush_bios) ||
2237 !bio_list_empty(&cache->deferred_writethrough_bios) ||
2238 !list_empty(&cache->quiesced_migrations) ||
2239 !list_empty(&cache->completed_migrations) ||
2240 !list_empty(&cache->need_commit_migrations) ||
2241 cache->invalidate;
2242}
2243
2244static void do_worker(struct work_struct *ws)
2245{
2246 struct cache *cache = container_of(ws, struct cache, worker);
2247
2248 do {
2249 if (!is_quiescing(cache)) {
2250 writeback_some_dirty_blocks(cache);
2251 process_deferred_writethrough_bios(cache);
2252 process_deferred_bios(cache);
2253 process_deferred_cells(cache);
2254 process_invalidation_requests(cache);
2255 }
2256
2257 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
2258 process_migrations(cache, &cache->completed_migrations, complete_migration);
2259
2260 if (commit_if_needed(cache)) {
2261 process_deferred_flush_bios(cache, false);
2262 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
2263 } else {
2264 process_deferred_flush_bios(cache, true);
2265 process_migrations(cache, &cache->need_commit_migrations,
2266 migration_success_post_commit);
2267 }
2268
2269 ack_quiescing(cache);
2270
2271 } while (more_work(cache));
2272}
2273
2274
2275
2276
2277
2278static void do_waker(struct work_struct *ws)
2279{
2280 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
2281 policy_tick(cache->policy, true);
2282 wake_worker(cache);
2283 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
2284}
2285
2286
2287
2288static int is_congested(struct dm_dev *dev, int bdi_bits)
2289{
2290 struct request_queue *q = bdev_get_queue(dev->bdev);
2291 return bdi_congested(&q->backing_dev_info, bdi_bits);
2292}
2293
2294static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2295{
2296 struct cache *cache = container_of(cb, struct cache, callbacks);
2297
2298 return is_congested(cache->origin_dev, bdi_bits) ||
2299 is_congested(cache->cache_dev, bdi_bits);
2300}
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310static void destroy(struct cache *cache)
2311{
2312 unsigned i;
2313
2314 mempool_destroy(cache->migration_pool);
2315
2316 if (cache->all_io_ds)
2317 dm_deferred_set_destroy(cache->all_io_ds);
2318
2319 if (cache->prison)
2320 dm_bio_prison_destroy(cache->prison);
2321
2322 if (cache->wq)
2323 destroy_workqueue(cache->wq);
2324
2325 if (cache->dirty_bitset)
2326 free_bitset(cache->dirty_bitset);
2327
2328 if (cache->discard_bitset)
2329 free_bitset(cache->discard_bitset);
2330
2331 if (cache->copier)
2332 dm_kcopyd_client_destroy(cache->copier);
2333
2334 if (cache->cmd)
2335 dm_cache_metadata_close(cache->cmd);
2336
2337 if (cache->metadata_dev)
2338 dm_put_device(cache->ti, cache->metadata_dev);
2339
2340 if (cache->origin_dev)
2341 dm_put_device(cache->ti, cache->origin_dev);
2342
2343 if (cache->cache_dev)
2344 dm_put_device(cache->ti, cache->cache_dev);
2345
2346 if (cache->policy)
2347 dm_cache_policy_destroy(cache->policy);
2348
2349 for (i = 0; i < cache->nr_ctr_args ; i++)
2350 kfree(cache->ctr_args[i]);
2351 kfree(cache->ctr_args);
2352
2353 kfree(cache);
2354}
2355
2356static void cache_dtr(struct dm_target *ti)
2357{
2358 struct cache *cache = ti->private;
2359
2360 destroy(cache);
2361}
2362
2363static sector_t get_dev_size(struct dm_dev *dev)
2364{
2365 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2366}
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399struct cache_args {
2400 struct dm_target *ti;
2401
2402 struct dm_dev *metadata_dev;
2403
2404 struct dm_dev *cache_dev;
2405 sector_t cache_sectors;
2406
2407 struct dm_dev *origin_dev;
2408 sector_t origin_sectors;
2409
2410 uint32_t block_size;
2411
2412 const char *policy_name;
2413 int policy_argc;
2414 const char **policy_argv;
2415
2416 struct cache_features features;
2417};
2418
2419static void destroy_cache_args(struct cache_args *ca)
2420{
2421 if (ca->metadata_dev)
2422 dm_put_device(ca->ti, ca->metadata_dev);
2423
2424 if (ca->cache_dev)
2425 dm_put_device(ca->ti, ca->cache_dev);
2426
2427 if (ca->origin_dev)
2428 dm_put_device(ca->ti, ca->origin_dev);
2429
2430 kfree(ca);
2431}
2432
2433static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2434{
2435 if (!as->argc) {
2436 *error = "Insufficient args";
2437 return false;
2438 }
2439
2440 return true;
2441}
2442
2443static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2444 char **error)
2445{
2446 int r;
2447 sector_t metadata_dev_size;
2448 char b[BDEVNAME_SIZE];
2449
2450 if (!at_least_one_arg(as, error))
2451 return -EINVAL;
2452
2453 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2454 &ca->metadata_dev);
2455 if (r) {
2456 *error = "Error opening metadata device";
2457 return r;
2458 }
2459
2460 metadata_dev_size = get_dev_size(ca->metadata_dev);
2461 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2462 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2463 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2464
2465 return 0;
2466}
2467
2468static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2469 char **error)
2470{
2471 int r;
2472
2473 if (!at_least_one_arg(as, error))
2474 return -EINVAL;
2475
2476 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2477 &ca->cache_dev);
2478 if (r) {
2479 *error = "Error opening cache device";
2480 return r;
2481 }
2482 ca->cache_sectors = get_dev_size(ca->cache_dev);
2483
2484 return 0;
2485}
2486
2487static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2488 char **error)
2489{
2490 int r;
2491
2492 if (!at_least_one_arg(as, error))
2493 return -EINVAL;
2494
2495 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2496 &ca->origin_dev);
2497 if (r) {
2498 *error = "Error opening origin device";
2499 return r;
2500 }
2501
2502 ca->origin_sectors = get_dev_size(ca->origin_dev);
2503 if (ca->ti->len > ca->origin_sectors) {
2504 *error = "Device size larger than cached device";
2505 return -EINVAL;
2506 }
2507
2508 return 0;
2509}
2510
2511static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2512 char **error)
2513{
2514 unsigned long block_size;
2515
2516 if (!at_least_one_arg(as, error))
2517 return -EINVAL;
2518
2519 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2520 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2521 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2522 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2523 *error = "Invalid data block size";
2524 return -EINVAL;
2525 }
2526
2527 if (block_size > ca->cache_sectors) {
2528 *error = "Data block size is larger than the cache device";
2529 return -EINVAL;
2530 }
2531
2532 ca->block_size = block_size;
2533
2534 return 0;
2535}
2536
2537static void init_features(struct cache_features *cf)
2538{
2539 cf->mode = CM_WRITE;
2540 cf->io_mode = CM_IO_WRITEBACK;
2541}
2542
2543static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2544 char **error)
2545{
2546 static struct dm_arg _args[] = {
2547 {0, 1, "Invalid number of cache feature arguments"},
2548 };
2549
2550 int r;
2551 unsigned argc;
2552 const char *arg;
2553 struct cache_features *cf = &ca->features;
2554
2555 init_features(cf);
2556
2557 r = dm_read_arg_group(_args, as, &argc, error);
2558 if (r)
2559 return -EINVAL;
2560
2561 while (argc--) {
2562 arg = dm_shift_arg(as);
2563
2564 if (!strcasecmp(arg, "writeback"))
2565 cf->io_mode = CM_IO_WRITEBACK;
2566
2567 else if (!strcasecmp(arg, "writethrough"))
2568 cf->io_mode = CM_IO_WRITETHROUGH;
2569
2570 else if (!strcasecmp(arg, "passthrough"))
2571 cf->io_mode = CM_IO_PASSTHROUGH;
2572
2573 else {
2574 *error = "Unrecognised cache feature requested";
2575 return -EINVAL;
2576 }
2577 }
2578
2579 return 0;
2580}
2581
2582static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2583 char **error)
2584{
2585 static struct dm_arg _args[] = {
2586 {0, 1024, "Invalid number of policy arguments"},
2587 };
2588
2589 int r;
2590
2591 if (!at_least_one_arg(as, error))
2592 return -EINVAL;
2593
2594 ca->policy_name = dm_shift_arg(as);
2595
2596 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2597 if (r)
2598 return -EINVAL;
2599
2600 ca->policy_argv = (const char **)as->argv;
2601 dm_consume_args(as, ca->policy_argc);
2602
2603 return 0;
2604}
2605
2606static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2607 char **error)
2608{
2609 int r;
2610 struct dm_arg_set as;
2611
2612 as.argc = argc;
2613 as.argv = argv;
2614
2615 r = parse_metadata_dev(ca, &as, error);
2616 if (r)
2617 return r;
2618
2619 r = parse_cache_dev(ca, &as, error);
2620 if (r)
2621 return r;
2622
2623 r = parse_origin_dev(ca, &as, error);
2624 if (r)
2625 return r;
2626
2627 r = parse_block_size(ca, &as, error);
2628 if (r)
2629 return r;
2630
2631 r = parse_features(ca, &as, error);
2632 if (r)
2633 return r;
2634
2635 r = parse_policy(ca, &as, error);
2636 if (r)
2637 return r;
2638
2639 return 0;
2640}
2641
2642
2643
2644static struct kmem_cache *migration_cache;
2645
2646#define NOT_CORE_OPTION 1
2647
2648static int process_config_option(struct cache *cache, const char *key, const char *value)
2649{
2650 unsigned long tmp;
2651
2652 if (!strcasecmp(key, "migration_threshold")) {
2653 if (kstrtoul(value, 10, &tmp))
2654 return -EINVAL;
2655
2656 cache->migration_threshold = tmp;
2657 return 0;
2658 }
2659
2660 return NOT_CORE_OPTION;
2661}
2662
2663static int set_config_value(struct cache *cache, const char *key, const char *value)
2664{
2665 int r = process_config_option(cache, key, value);
2666
2667 if (r == NOT_CORE_OPTION)
2668 r = policy_set_config_value(cache->policy, key, value);
2669
2670 if (r)
2671 DMWARN("bad config value for %s: %s", key, value);
2672
2673 return r;
2674}
2675
2676static int set_config_values(struct cache *cache, int argc, const char **argv)
2677{
2678 int r = 0;
2679
2680 if (argc & 1) {
2681 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2682 return -EINVAL;
2683 }
2684
2685 while (argc) {
2686 r = set_config_value(cache, argv[0], argv[1]);
2687 if (r)
2688 break;
2689
2690 argc -= 2;
2691 argv += 2;
2692 }
2693
2694 return r;
2695}
2696
2697static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2698 char **error)
2699{
2700 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2701 cache->cache_size,
2702 cache->origin_sectors,
2703 cache->sectors_per_block);
2704 if (IS_ERR(p)) {
2705 *error = "Error creating cache's policy";
2706 return PTR_ERR(p);
2707 }
2708 cache->policy = p;
2709
2710 return 0;
2711}
2712
2713
2714
2715
2716
2717#define MAX_DISCARD_BLOCKS (1 << 14)
2718
2719static bool too_many_discard_blocks(sector_t discard_block_size,
2720 sector_t origin_size)
2721{
2722 (void) sector_div(origin_size, discard_block_size);
2723
2724 return origin_size > MAX_DISCARD_BLOCKS;
2725}
2726
2727static sector_t calculate_discard_block_size(sector_t cache_block_size,
2728 sector_t origin_size)
2729{
2730 sector_t discard_block_size = cache_block_size;
2731
2732 if (origin_size)
2733 while (too_many_discard_blocks(discard_block_size, origin_size))
2734 discard_block_size *= 2;
2735
2736 return discard_block_size;
2737}
2738
2739static void set_cache_size(struct cache *cache, dm_cblock_t size)
2740{
2741 dm_block_t nr_blocks = from_cblock(size);
2742
2743 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2744 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2745 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2746 "Please consider increasing the cache block size to reduce the overall cache block count.",
2747 (unsigned long long) nr_blocks);
2748
2749 cache->cache_size = size;
2750}
2751
2752#define DEFAULT_MIGRATION_THRESHOLD 2048
2753
2754static int cache_create(struct cache_args *ca, struct cache **result)
2755{
2756 int r = 0;
2757 char **error = &ca->ti->error;
2758 struct cache *cache;
2759 struct dm_target *ti = ca->ti;
2760 dm_block_t origin_blocks;
2761 struct dm_cache_metadata *cmd;
2762 bool may_format = ca->features.mode == CM_WRITE;
2763
2764 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2765 if (!cache)
2766 return -ENOMEM;
2767
2768 cache->ti = ca->ti;
2769 ti->private = cache;
2770 ti->num_flush_bios = 2;
2771 ti->flush_supported = true;
2772
2773 ti->num_discard_bios = 1;
2774 ti->discards_supported = true;
2775 ti->discard_zeroes_data_unsupported = true;
2776 ti->split_discard_bios = false;
2777
2778 cache->features = ca->features;
2779 ti->per_io_data_size = get_per_bio_data_size(cache);
2780
2781 cache->callbacks.congested_fn = cache_is_congested;
2782 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2783
2784 cache->metadata_dev = ca->metadata_dev;
2785 cache->origin_dev = ca->origin_dev;
2786 cache->cache_dev = ca->cache_dev;
2787
2788 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2789
2790
2791 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2792 origin_blocks = block_div(origin_blocks, ca->block_size);
2793 cache->origin_blocks = to_oblock(origin_blocks);
2794
2795 cache->sectors_per_block = ca->block_size;
2796 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2797 r = -EINVAL;
2798 goto bad;
2799 }
2800
2801 if (ca->block_size & (ca->block_size - 1)) {
2802 dm_block_t cache_size = ca->cache_sectors;
2803
2804 cache->sectors_per_block_shift = -1;
2805 cache_size = block_div(cache_size, ca->block_size);
2806 set_cache_size(cache, to_cblock(cache_size));
2807 } else {
2808 cache->sectors_per_block_shift = __ffs(ca->block_size);
2809 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2810 }
2811
2812 r = create_cache_policy(cache, ca, error);
2813 if (r)
2814 goto bad;
2815
2816 cache->policy_nr_args = ca->policy_argc;
2817 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2818
2819 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2820 if (r) {
2821 *error = "Error setting cache policy's config values";
2822 goto bad;
2823 }
2824
2825 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2826 ca->block_size, may_format,
2827 dm_cache_policy_get_hint_size(cache->policy));
2828 if (IS_ERR(cmd)) {
2829 *error = "Error creating metadata object";
2830 r = PTR_ERR(cmd);
2831 goto bad;
2832 }
2833 cache->cmd = cmd;
2834 set_cache_mode(cache, CM_WRITE);
2835 if (get_cache_mode(cache) != CM_WRITE) {
2836 *error = "Unable to get write access to metadata, please check/repair metadata.";
2837 r = -EINVAL;
2838 goto bad;
2839 }
2840
2841 if (passthrough_mode(&cache->features)) {
2842 bool all_clean;
2843
2844 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2845 if (r) {
2846 *error = "dm_cache_metadata_all_clean() failed";
2847 goto bad;
2848 }
2849
2850 if (!all_clean) {
2851 *error = "Cannot enter passthrough mode unless all blocks are clean";
2852 r = -EINVAL;
2853 goto bad;
2854 }
2855 }
2856
2857 spin_lock_init(&cache->lock);
2858 INIT_LIST_HEAD(&cache->deferred_cells);
2859 bio_list_init(&cache->deferred_bios);
2860 bio_list_init(&cache->deferred_flush_bios);
2861 bio_list_init(&cache->deferred_writethrough_bios);
2862 INIT_LIST_HEAD(&cache->quiesced_migrations);
2863 INIT_LIST_HEAD(&cache->completed_migrations);
2864 INIT_LIST_HEAD(&cache->need_commit_migrations);
2865 atomic_set(&cache->nr_allocated_migrations, 0);
2866 atomic_set(&cache->nr_io_migrations, 0);
2867 init_waitqueue_head(&cache->migration_wait);
2868
2869 init_waitqueue_head(&cache->quiescing_wait);
2870 atomic_set(&cache->quiescing, 0);
2871 atomic_set(&cache->quiescing_ack, 0);
2872
2873 r = -ENOMEM;
2874 atomic_set(&cache->nr_dirty, 0);
2875 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2876 if (!cache->dirty_bitset) {
2877 *error = "could not allocate dirty bitset";
2878 goto bad;
2879 }
2880 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2881
2882 cache->discard_block_size =
2883 calculate_discard_block_size(cache->sectors_per_block,
2884 cache->origin_sectors);
2885 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2886 cache->discard_block_size));
2887 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2888 if (!cache->discard_bitset) {
2889 *error = "could not allocate discard bitset";
2890 goto bad;
2891 }
2892 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2893
2894 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2895 if (IS_ERR(cache->copier)) {
2896 *error = "could not create kcopyd client";
2897 r = PTR_ERR(cache->copier);
2898 goto bad;
2899 }
2900
2901 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2902 if (!cache->wq) {
2903 *error = "could not create workqueue for metadata object";
2904 goto bad;
2905 }
2906 INIT_WORK(&cache->worker, do_worker);
2907 INIT_DELAYED_WORK(&cache->waker, do_waker);
2908 cache->last_commit_jiffies = jiffies;
2909
2910 cache->prison = dm_bio_prison_create();
2911 if (!cache->prison) {
2912 *error = "could not create bio prison";
2913 goto bad;
2914 }
2915
2916 cache->all_io_ds = dm_deferred_set_create();
2917 if (!cache->all_io_ds) {
2918 *error = "could not create all_io deferred set";
2919 goto bad;
2920 }
2921
2922 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2923 migration_cache);
2924 if (!cache->migration_pool) {
2925 *error = "Error creating cache's migration mempool";
2926 goto bad;
2927 }
2928
2929 cache->need_tick_bio = true;
2930 cache->sized = false;
2931 cache->invalidate = false;
2932 cache->commit_requested = false;
2933 cache->loaded_mappings = false;
2934 cache->loaded_discards = false;
2935
2936 load_stats(cache);
2937
2938 atomic_set(&cache->stats.demotion, 0);
2939 atomic_set(&cache->stats.promotion, 0);
2940 atomic_set(&cache->stats.copies_avoided, 0);
2941 atomic_set(&cache->stats.cache_cell_clash, 0);
2942 atomic_set(&cache->stats.commit_count, 0);
2943 atomic_set(&cache->stats.discard_count, 0);
2944
2945 spin_lock_init(&cache->invalidation_lock);
2946 INIT_LIST_HEAD(&cache->invalidation_requests);
2947
2948 iot_init(&cache->origin_tracker);
2949
2950 *result = cache;
2951 return 0;
2952
2953bad:
2954 destroy(cache);
2955 return r;
2956}
2957
2958static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2959{
2960 unsigned i;
2961 const char **copy;
2962
2963 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2964 if (!copy)
2965 return -ENOMEM;
2966 for (i = 0; i < argc; i++) {
2967 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2968 if (!copy[i]) {
2969 while (i--)
2970 kfree(copy[i]);
2971 kfree(copy);
2972 return -ENOMEM;
2973 }
2974 }
2975
2976 cache->nr_ctr_args = argc;
2977 cache->ctr_args = copy;
2978
2979 return 0;
2980}
2981
2982static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2983{
2984 int r = -EINVAL;
2985 struct cache_args *ca;
2986 struct cache *cache = NULL;
2987
2988 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2989 if (!ca) {
2990 ti->error = "Error allocating memory for cache";
2991 return -ENOMEM;
2992 }
2993 ca->ti = ti;
2994
2995 r = parse_cache_args(ca, argc, argv, &ti->error);
2996 if (r)
2997 goto out;
2998
2999 r = cache_create(ca, &cache);
3000 if (r)
3001 goto out;
3002
3003 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
3004 if (r) {
3005 destroy(cache);
3006 goto out;
3007 }
3008
3009 ti->private = cache;
3010
3011out:
3012 destroy_cache_args(ca);
3013 return r;
3014}
3015
3016
3017
3018static int cache_map(struct dm_target *ti, struct bio *bio)
3019{
3020 struct cache *cache = ti->private;
3021
3022 int r;
3023 struct dm_bio_prison_cell *cell = NULL;
3024 dm_oblock_t block = get_bio_block(cache, bio);
3025 size_t pb_data_size = get_per_bio_data_size(cache);
3026 bool can_migrate = false;
3027 bool fast_promotion;
3028 struct policy_result lookup_result;
3029 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
3030 struct old_oblock_lock ool;
3031
3032 ool.locker.fn = null_locker;
3033
3034 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
3035
3036
3037
3038
3039
3040 remap_to_origin(cache, bio);
3041 accounted_begin(cache, bio);
3042 return DM_MAPIO_REMAPPED;
3043 }
3044
3045 if (discard_or_flush(bio)) {
3046 defer_bio(cache, bio);
3047 return DM_MAPIO_SUBMITTED;
3048 }
3049
3050
3051
3052
3053 cell = alloc_prison_cell(cache);
3054 if (!cell) {
3055 defer_bio(cache, bio);
3056 return DM_MAPIO_SUBMITTED;
3057 }
3058
3059 r = bio_detain(cache, block, bio, cell,
3060 (cell_free_fn) free_prison_cell,
3061 cache, &cell);
3062 if (r) {
3063 if (r < 0)
3064 defer_bio(cache, bio);
3065
3066 return DM_MAPIO_SUBMITTED;
3067 }
3068
3069 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
3070
3071 r = policy_map(cache->policy, block, false, can_migrate, fast_promotion,
3072 bio, &ool.locker, &lookup_result);
3073 if (r == -EWOULDBLOCK) {
3074 cell_defer(cache, cell, true);
3075 return DM_MAPIO_SUBMITTED;
3076
3077 } else if (r) {
3078 DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d",
3079 cache_device_name(cache), r);
3080 cell_defer(cache, cell, false);
3081 bio_io_error(bio);
3082 return DM_MAPIO_SUBMITTED;
3083 }
3084
3085 r = DM_MAPIO_REMAPPED;
3086 switch (lookup_result.op) {
3087 case POLICY_HIT:
3088 if (passthrough_mode(&cache->features)) {
3089 if (bio_data_dir(bio) == WRITE) {
3090
3091
3092
3093
3094 cell_defer(cache, cell, true);
3095 r = DM_MAPIO_SUBMITTED;
3096
3097 } else {
3098 inc_miss_counter(cache, bio);
3099 remap_to_origin_clear_discard(cache, bio, block);
3100 accounted_begin(cache, bio);
3101 inc_ds(cache, bio, cell);
3102
3103
3104 cell_defer(cache, cell, false);
3105 }
3106
3107 } else {
3108 inc_hit_counter(cache, bio);
3109 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
3110 !is_dirty(cache, lookup_result.cblock)) {
3111 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
3112 accounted_begin(cache, bio);
3113 inc_ds(cache, bio, cell);
3114 cell_defer(cache, cell, false);
3115
3116 } else
3117 remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false);
3118 }
3119 break;
3120
3121 case POLICY_MISS:
3122 inc_miss_counter(cache, bio);
3123 if (pb->req_nr != 0) {
3124
3125
3126
3127
3128 bio_endio(bio);
3129
3130 cell_defer(cache, cell, false);
3131 r = DM_MAPIO_SUBMITTED;
3132
3133 } else
3134 remap_cell_to_origin_clear_discard(cache, cell, block, false);
3135 break;
3136
3137 default:
3138 DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u",
3139 cache_device_name(cache), __func__,
3140 (unsigned) lookup_result.op);
3141 cell_defer(cache, cell, false);
3142 bio_io_error(bio);
3143 r = DM_MAPIO_SUBMITTED;
3144 }
3145
3146 return r;
3147}
3148
3149static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
3150{
3151 struct cache *cache = ti->private;
3152 unsigned long flags;
3153 size_t pb_data_size = get_per_bio_data_size(cache);
3154 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
3155
3156 if (pb->tick) {
3157 policy_tick(cache->policy, false);
3158
3159 spin_lock_irqsave(&cache->lock, flags);
3160 cache->need_tick_bio = true;
3161 spin_unlock_irqrestore(&cache->lock, flags);
3162 }
3163
3164 check_for_quiesced_migrations(cache, pb);
3165 accounted_complete(cache, bio);
3166
3167 return 0;
3168}
3169
3170static int write_dirty_bitset(struct cache *cache)
3171{
3172 unsigned i, r;
3173
3174 if (get_cache_mode(cache) >= CM_READ_ONLY)
3175 return -EINVAL;
3176
3177 for (i = 0; i < from_cblock(cache->cache_size); i++) {
3178 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
3179 is_dirty(cache, to_cblock(i)));
3180 if (r) {
3181 metadata_operation_failed(cache, "dm_cache_set_dirty", r);
3182 return r;
3183 }
3184 }
3185
3186 return 0;
3187}
3188
3189static int write_discard_bitset(struct cache *cache)
3190{
3191 unsigned i, r;
3192
3193 if (get_cache_mode(cache) >= CM_READ_ONLY)
3194 return -EINVAL;
3195
3196 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
3197 cache->discard_nr_blocks);
3198 if (r) {
3199 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
3200 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
3201 return r;
3202 }
3203
3204 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
3205 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
3206 is_discarded(cache, to_dblock(i)));
3207 if (r) {
3208 metadata_operation_failed(cache, "dm_cache_set_discard", r);
3209 return r;
3210 }
3211 }
3212
3213 return 0;
3214}
3215
3216static int write_hints(struct cache *cache)
3217{
3218 int r;
3219
3220 if (get_cache_mode(cache) >= CM_READ_ONLY)
3221 return -EINVAL;
3222
3223 r = dm_cache_write_hints(cache->cmd, cache->policy);
3224 if (r) {
3225 metadata_operation_failed(cache, "dm_cache_write_hints", r);
3226 return r;
3227 }
3228
3229 return 0;
3230}
3231
3232
3233
3234
3235static bool sync_metadata(struct cache *cache)
3236{
3237 int r1, r2, r3, r4;
3238
3239 r1 = write_dirty_bitset(cache);
3240 if (r1)
3241 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
3242
3243 r2 = write_discard_bitset(cache);
3244 if (r2)
3245 DMERR("%s: could not write discard bitset", cache_device_name(cache));
3246
3247 save_stats(cache);
3248
3249 r3 = write_hints(cache);
3250 if (r3)
3251 DMERR("%s: could not write hints", cache_device_name(cache));
3252
3253
3254
3255
3256
3257
3258 r4 = commit(cache, !r1 && !r2 && !r3);
3259 if (r4)
3260 DMERR("%s: could not write cache metadata", cache_device_name(cache));
3261
3262 return !r1 && !r2 && !r3 && !r4;
3263}
3264
3265static void cache_postsuspend(struct dm_target *ti)
3266{
3267 struct cache *cache = ti->private;
3268
3269 start_quiescing(cache);
3270 wait_for_migrations(cache);
3271 stop_worker(cache);
3272 requeue_deferred_bios(cache);
3273 requeue_deferred_cells(cache);
3274 stop_quiescing(cache);
3275
3276 if (get_cache_mode(cache) == CM_WRITE)
3277 (void) sync_metadata(cache);
3278}
3279
3280static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
3281 bool dirty, uint32_t hint, bool hint_valid)
3282{
3283 int r;
3284 struct cache *cache = context;
3285
3286 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
3287 if (r)
3288 return r;
3289
3290 if (dirty)
3291 set_dirty(cache, oblock, cblock);
3292 else
3293 clear_dirty(cache, oblock, cblock);
3294
3295 return 0;
3296}
3297
3298
3299
3300
3301
3302
3303
3304struct discard_load_info {
3305 struct cache *cache;
3306
3307
3308
3309
3310
3311 dm_block_t block_size;
3312 dm_block_t discard_begin, discard_end;
3313};
3314
3315static void discard_load_info_init(struct cache *cache,
3316 struct discard_load_info *li)
3317{
3318 li->cache = cache;
3319 li->discard_begin = li->discard_end = 0;
3320}
3321
3322static void set_discard_range(struct discard_load_info *li)
3323{
3324 sector_t b, e;
3325
3326 if (li->discard_begin == li->discard_end)
3327 return;
3328
3329
3330
3331
3332 b = li->discard_begin * li->block_size;
3333 e = li->discard_end * li->block_size;
3334
3335
3336
3337
3338 b = dm_sector_div_up(b, li->cache->discard_block_size);
3339 sector_div(e, li->cache->discard_block_size);
3340
3341
3342
3343
3344
3345 if (e > from_dblock(li->cache->discard_nr_blocks))
3346 e = from_dblock(li->cache->discard_nr_blocks);
3347
3348 for (; b < e; b++)
3349 set_discard(li->cache, to_dblock(b));
3350}
3351
3352static int load_discard(void *context, sector_t discard_block_size,
3353 dm_dblock_t dblock, bool discard)
3354{
3355 struct discard_load_info *li = context;
3356
3357 li->block_size = discard_block_size;
3358
3359 if (discard) {
3360 if (from_dblock(dblock) == li->discard_end)
3361
3362
3363
3364 li->discard_end = li->discard_end + 1ULL;
3365
3366 else {
3367
3368
3369
3370 set_discard_range(li);
3371 li->discard_begin = from_dblock(dblock);
3372 li->discard_end = li->discard_begin + 1ULL;
3373 }
3374 } else {
3375 set_discard_range(li);
3376 li->discard_begin = li->discard_end = 0;
3377 }
3378
3379 return 0;
3380}
3381
3382static dm_cblock_t get_cache_dev_size(struct cache *cache)
3383{
3384 sector_t size = get_dev_size(cache->cache_dev);
3385 (void) sector_div(size, cache->sectors_per_block);
3386 return to_cblock(size);
3387}
3388
3389static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3390{
3391 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3392 return true;
3393
3394
3395
3396
3397 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3398 new_size = to_cblock(from_cblock(new_size) + 1);
3399 if (is_dirty(cache, new_size)) {
3400 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
3401 cache_device_name(cache),
3402 (unsigned long long) from_cblock(new_size));
3403 return false;
3404 }
3405 }
3406
3407 return true;
3408}
3409
3410static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3411{
3412 int r;
3413
3414 r = dm_cache_resize(cache->cmd, new_size);
3415 if (r) {
3416 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3417 metadata_operation_failed(cache, "dm_cache_resize", r);
3418 return r;
3419 }
3420
3421 set_cache_size(cache, new_size);
3422
3423 return 0;
3424}
3425
3426static int cache_preresume(struct dm_target *ti)
3427{
3428 int r = 0;
3429 struct cache *cache = ti->private;
3430 dm_cblock_t csize = get_cache_dev_size(cache);
3431
3432
3433
3434
3435 if (!cache->sized) {
3436 r = resize_cache_dev(cache, csize);
3437 if (r)
3438 return r;
3439
3440 cache->sized = true;
3441
3442 } else if (csize != cache->cache_size) {
3443 if (!can_resize(cache, csize))
3444 return -EINVAL;
3445
3446 r = resize_cache_dev(cache, csize);
3447 if (r)
3448 return r;
3449 }
3450
3451 if (!cache->loaded_mappings) {
3452 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3453 load_mapping, cache);
3454 if (r) {
3455 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3456 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3457 return r;
3458 }
3459
3460 cache->loaded_mappings = true;
3461 }
3462
3463 if (!cache->loaded_discards) {
3464 struct discard_load_info li;
3465
3466
3467
3468
3469
3470
3471 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3472
3473 discard_load_info_init(cache, &li);
3474 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3475 if (r) {
3476 DMERR("%s: could not load origin discards", cache_device_name(cache));
3477 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3478 return r;
3479 }
3480 set_discard_range(&li);
3481
3482 cache->loaded_discards = true;
3483 }
3484
3485 return r;
3486}
3487
3488static void cache_resume(struct dm_target *ti)
3489{
3490 struct cache *cache = ti->private;
3491
3492 cache->need_tick_bio = true;
3493 do_waker(&cache->waker.work);
3494}
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507static void cache_status(struct dm_target *ti, status_type_t type,
3508 unsigned status_flags, char *result, unsigned maxlen)
3509{
3510 int r = 0;
3511 unsigned i;
3512 ssize_t sz = 0;
3513 dm_block_t nr_free_blocks_metadata = 0;
3514 dm_block_t nr_blocks_metadata = 0;
3515 char buf[BDEVNAME_SIZE];
3516 struct cache *cache = ti->private;
3517 dm_cblock_t residency;
3518 bool needs_check;
3519
3520 switch (type) {
3521 case STATUSTYPE_INFO:
3522 if (get_cache_mode(cache) == CM_FAIL) {
3523 DMEMIT("Fail");
3524 break;
3525 }
3526
3527
3528 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3529 (void) commit(cache, false);
3530
3531 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3532 if (r) {
3533 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3534 cache_device_name(cache), r);
3535 goto err;
3536 }
3537
3538 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3539 if (r) {
3540 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3541 cache_device_name(cache), r);
3542 goto err;
3543 }
3544
3545 residency = policy_residency(cache->policy);
3546
3547 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
3548 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3549 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3550 (unsigned long long)nr_blocks_metadata,
3551 cache->sectors_per_block,
3552 (unsigned long long) from_cblock(residency),
3553 (unsigned long long) from_cblock(cache->cache_size),
3554 (unsigned) atomic_read(&cache->stats.read_hit),
3555 (unsigned) atomic_read(&cache->stats.read_miss),
3556 (unsigned) atomic_read(&cache->stats.write_hit),
3557 (unsigned) atomic_read(&cache->stats.write_miss),
3558 (unsigned) atomic_read(&cache->stats.demotion),
3559 (unsigned) atomic_read(&cache->stats.promotion),
3560 (unsigned long) atomic_read(&cache->nr_dirty));
3561
3562 if (writethrough_mode(&cache->features))
3563 DMEMIT("1 writethrough ");
3564
3565 else if (passthrough_mode(&cache->features))
3566 DMEMIT("1 passthrough ");
3567
3568 else if (writeback_mode(&cache->features))
3569 DMEMIT("1 writeback ");
3570
3571 else {
3572 DMERR("%s: internal error: unknown io mode: %d",
3573 cache_device_name(cache), (int) cache->features.io_mode);
3574 goto err;
3575 }
3576
3577 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3578
3579 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3580 if (sz < maxlen) {
3581 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3582 if (r)
3583 DMERR("%s: policy_emit_config_values returned %d",
3584 cache_device_name(cache), r);
3585 }
3586
3587 if (get_cache_mode(cache) == CM_READ_ONLY)
3588 DMEMIT("ro ");
3589 else
3590 DMEMIT("rw ");
3591
3592 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3593
3594 if (r || needs_check)
3595 DMEMIT("needs_check ");
3596 else
3597 DMEMIT("- ");
3598
3599 break;
3600
3601 case STATUSTYPE_TABLE:
3602 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3603 DMEMIT("%s ", buf);
3604 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3605 DMEMIT("%s ", buf);
3606 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3607 DMEMIT("%s", buf);
3608
3609 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3610 DMEMIT(" %s", cache->ctr_args[i]);
3611 if (cache->nr_ctr_args)
3612 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3613 }
3614
3615 return;
3616
3617err:
3618 DMEMIT("Error");
3619}
3620
3621
3622
3623
3624
3625
3626
3627static int parse_cblock_range(struct cache *cache, const char *str,
3628 struct cblock_range *result)
3629{
3630 char dummy;
3631 uint64_t b, e;
3632 int r;
3633
3634
3635
3636
3637 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3638 if (r < 0)
3639 return r;
3640
3641 if (r == 2) {
3642 result->begin = to_cblock(b);
3643 result->end = to_cblock(e);
3644 return 0;
3645 }
3646
3647
3648
3649
3650 r = sscanf(str, "%llu%c", &b, &dummy);
3651 if (r < 0)
3652 return r;
3653
3654 if (r == 1) {
3655 result->begin = to_cblock(b);
3656 result->end = to_cblock(from_cblock(result->begin) + 1u);
3657 return 0;
3658 }
3659
3660 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3661 return -EINVAL;
3662}
3663
3664static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3665{
3666 uint64_t b = from_cblock(range->begin);
3667 uint64_t e = from_cblock(range->end);
3668 uint64_t n = from_cblock(cache->cache_size);
3669
3670 if (b >= n) {
3671 DMERR("%s: begin cblock out of range: %llu >= %llu",
3672 cache_device_name(cache), b, n);
3673 return -EINVAL;
3674 }
3675
3676 if (e > n) {
3677 DMERR("%s: end cblock out of range: %llu > %llu",
3678 cache_device_name(cache), e, n);
3679 return -EINVAL;
3680 }
3681
3682 if (b >= e) {
3683 DMERR("%s: invalid cblock range: %llu >= %llu",
3684 cache_device_name(cache), b, e);
3685 return -EINVAL;
3686 }
3687
3688 return 0;
3689}
3690
3691static int request_invalidation(struct cache *cache, struct cblock_range *range)
3692{
3693 struct invalidation_request req;
3694
3695 INIT_LIST_HEAD(&req.list);
3696 req.cblocks = range;
3697 atomic_set(&req.complete, 0);
3698 req.err = 0;
3699 init_waitqueue_head(&req.result_wait);
3700
3701 spin_lock(&cache->invalidation_lock);
3702 list_add(&req.list, &cache->invalidation_requests);
3703 spin_unlock(&cache->invalidation_lock);
3704 wake_worker(cache);
3705
3706 wait_event(req.result_wait, atomic_read(&req.complete));
3707 return req.err;
3708}
3709
3710static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3711 const char **cblock_ranges)
3712{
3713 int r = 0;
3714 unsigned i;
3715 struct cblock_range range;
3716
3717 if (!passthrough_mode(&cache->features)) {
3718 DMERR("%s: cache has to be in passthrough mode for invalidation",
3719 cache_device_name(cache));
3720 return -EPERM;
3721 }
3722
3723 for (i = 0; i < count; i++) {
3724 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3725 if (r)
3726 break;
3727
3728 r = validate_cblock_range(cache, &range);
3729 if (r)
3730 break;
3731
3732
3733
3734
3735 r = request_invalidation(cache, &range);
3736 if (r)
3737 break;
3738 }
3739
3740 return r;
3741}
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3752{
3753 struct cache *cache = ti->private;
3754
3755 if (!argc)
3756 return -EINVAL;
3757
3758 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3759 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3760 cache_device_name(cache));
3761 return -EOPNOTSUPP;
3762 }
3763
3764 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3765 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3766
3767 if (argc != 2)
3768 return -EINVAL;
3769
3770 return set_config_value(cache, argv[0], argv[1]);
3771}
3772
3773static int cache_iterate_devices(struct dm_target *ti,
3774 iterate_devices_callout_fn fn, void *data)
3775{
3776 int r = 0;
3777 struct cache *cache = ti->private;
3778
3779 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3780 if (!r)
3781 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3782
3783 return r;
3784}
3785
3786static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3787{
3788
3789
3790
3791 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3792 cache->origin_sectors);
3793 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3794}
3795
3796static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3797{
3798 struct cache *cache = ti->private;
3799 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3800
3801
3802
3803
3804
3805 if (io_opt_sectors < cache->sectors_per_block ||
3806 do_div(io_opt_sectors, cache->sectors_per_block)) {
3807 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3808 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3809 }
3810 set_discard_limits(cache, limits);
3811}
3812
3813
3814
3815static struct target_type cache_target = {
3816 .name = "cache",
3817 .version = {1, 9, 0},
3818 .module = THIS_MODULE,
3819 .ctr = cache_ctr,
3820 .dtr = cache_dtr,
3821 .map = cache_map,
3822 .end_io = cache_end_io,
3823 .postsuspend = cache_postsuspend,
3824 .preresume = cache_preresume,
3825 .resume = cache_resume,
3826 .status = cache_status,
3827 .message = cache_message,
3828 .iterate_devices = cache_iterate_devices,
3829 .io_hints = cache_io_hints,
3830};
3831
3832static int __init dm_cache_init(void)
3833{
3834 int r;
3835
3836 r = dm_register_target(&cache_target);
3837 if (r) {
3838 DMERR("cache target registration failed: %d", r);
3839 return r;
3840 }
3841
3842 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3843 if (!migration_cache) {
3844 dm_unregister_target(&cache_target);
3845 return -ENOMEM;
3846 }
3847
3848 return 0;
3849}
3850
3851static void __exit dm_cache_exit(void)
3852{
3853 dm_unregister_target(&cache_target);
3854 kmem_cache_destroy(migration_cache);
3855}
3856
3857module_init(dm_cache_init);
3858module_exit(dm_cache_exit);
3859
3860MODULE_DESCRIPTION(DM_NAME " cache target");
3861MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3862MODULE_LICENSE("GPL");
3863