1
2
3
4
5
6
7#include "dm.h"
8#include "dm-bio-prison.h"
9#include "dm-bio-record.h"
10#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26
27
28#define IOT_RESOLUTION 4
29
30struct io_tracker {
31 spinlock_t lock;
32
33
34
35
36 sector_t in_flight;
37
38
39
40
41
42 unsigned long idle_time;
43 unsigned long last_update_time;
44};
45
46static void iot_init(struct io_tracker *iot)
47{
48 spin_lock_init(&iot->lock);
49 iot->in_flight = 0ul;
50 iot->idle_time = 0ul;
51 iot->last_update_time = jiffies;
52}
53
54static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
55{
56 if (iot->in_flight)
57 return false;
58
59 return time_after(jiffies, iot->idle_time + jifs);
60}
61
62static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
63{
64 bool r;
65 unsigned long flags;
66
67 spin_lock_irqsave(&iot->lock, flags);
68 r = __iot_idle_for(iot, jifs);
69 spin_unlock_irqrestore(&iot->lock, flags);
70
71 return r;
72}
73
74static void iot_io_begin(struct io_tracker *iot, sector_t len)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&iot->lock, flags);
79 iot->in_flight += len;
80 spin_unlock_irqrestore(&iot->lock, flags);
81}
82
83static void __iot_io_end(struct io_tracker *iot, sector_t len)
84{
85 iot->in_flight -= len;
86 if (!iot->in_flight)
87 iot->idle_time = jiffies;
88}
89
90static void iot_io_end(struct io_tracker *iot, sector_t len)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&iot->lock, flags);
95 __iot_io_end(iot, len);
96 spin_unlock_irqrestore(&iot->lock, flags);
97}
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119struct dm_hook_info {
120 bio_end_io_t *bi_end_io;
121};
122
123static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
124 bio_end_io_t *bi_end_io, void *bi_private)
125{
126 h->bi_end_io = bio->bi_end_io;
127
128 bio->bi_end_io = bi_end_io;
129 bio->bi_private = bi_private;
130}
131
132static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
133{
134 bio->bi_end_io = h->bi_end_io;
135}
136
137
138
139#define MIGRATION_POOL_SIZE 128
140#define COMMIT_PERIOD HZ
141#define MIGRATION_COUNT_WINDOW 10
142
143
144
145
146
147#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
148#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
149
150enum cache_metadata_mode {
151 CM_WRITE,
152 CM_READ_ONLY,
153 CM_FAIL
154};
155
156enum cache_io_mode {
157
158
159
160
161
162 CM_IO_WRITEBACK,
163
164
165
166
167
168 CM_IO_WRITETHROUGH,
169
170
171
172
173
174
175
176 CM_IO_PASSTHROUGH
177};
178
179struct cache_features {
180 enum cache_metadata_mode mode;
181 enum cache_io_mode io_mode;
182};
183
184struct cache_stats {
185 atomic_t read_hit;
186 atomic_t read_miss;
187 atomic_t write_hit;
188 atomic_t write_miss;
189 atomic_t demotion;
190 atomic_t promotion;
191 atomic_t copies_avoided;
192 atomic_t cache_cell_clash;
193 atomic_t commit_count;
194 atomic_t discard_count;
195};
196
197
198
199
200
201struct cblock_range {
202 dm_cblock_t begin;
203 dm_cblock_t end;
204};
205
206struct invalidation_request {
207 struct list_head list;
208 struct cblock_range *cblocks;
209
210 atomic_t complete;
211 int err;
212
213 wait_queue_head_t result_wait;
214};
215
216struct cache {
217 struct dm_target *ti;
218 struct dm_target_callbacks callbacks;
219
220 struct dm_cache_metadata *cmd;
221
222
223
224
225 struct dm_dev *metadata_dev;
226
227
228
229
230 struct dm_dev *origin_dev;
231
232
233
234
235 struct dm_dev *cache_dev;
236
237
238
239
240 dm_oblock_t origin_blocks;
241 sector_t origin_sectors;
242
243
244
245
246 dm_cblock_t cache_size;
247
248
249
250
251 uint32_t sectors_per_block;
252 int sectors_per_block_shift;
253
254 spinlock_t lock;
255 struct list_head deferred_cells;
256 struct bio_list deferred_bios;
257 struct bio_list deferred_flush_bios;
258 struct bio_list deferred_writethrough_bios;
259 struct list_head quiesced_migrations;
260 struct list_head completed_migrations;
261 struct list_head need_commit_migrations;
262 sector_t migration_threshold;
263 wait_queue_head_t migration_wait;
264 atomic_t nr_allocated_migrations;
265
266
267
268
269
270 atomic_t nr_io_migrations;
271
272 wait_queue_head_t quiescing_wait;
273 atomic_t quiescing;
274 atomic_t quiescing_ack;
275
276
277
278
279 atomic_t nr_dirty;
280 unsigned long *dirty_bitset;
281
282
283
284
285 dm_dblock_t discard_nr_blocks;
286 unsigned long *discard_bitset;
287 uint32_t discard_block_size;
288
289
290
291
292
293 unsigned nr_ctr_args;
294 const char **ctr_args;
295
296 struct dm_kcopyd_client *copier;
297 struct workqueue_struct *wq;
298 struct work_struct worker;
299
300 struct delayed_work waker;
301 unsigned long last_commit_jiffies;
302
303 struct dm_bio_prison *prison;
304 struct dm_deferred_set *all_io_ds;
305
306 mempool_t *migration_pool;
307
308 struct dm_cache_policy *policy;
309 unsigned policy_nr_args;
310
311 bool need_tick_bio:1;
312 bool sized:1;
313 bool invalidate:1;
314 bool commit_requested:1;
315 bool loaded_mappings:1;
316 bool loaded_discards:1;
317
318
319
320
321 struct cache_features features;
322
323 struct cache_stats stats;
324
325
326
327
328 spinlock_t invalidation_lock;
329 struct list_head invalidation_requests;
330
331 struct io_tracker origin_tracker;
332};
333
334struct per_bio_data {
335 bool tick:1;
336 unsigned req_nr:2;
337 struct dm_deferred_entry *all_io_entry;
338 struct dm_hook_info hook_info;
339 sector_t len;
340
341
342
343
344
345
346 struct cache *cache;
347 dm_cblock_t cblock;
348 struct dm_bio_details bio_details;
349};
350
351struct dm_cache_migration {
352 struct list_head list;
353 struct cache *cache;
354
355 unsigned long start_jiffies;
356 dm_oblock_t old_oblock;
357 dm_oblock_t new_oblock;
358 dm_cblock_t cblock;
359
360 bool err:1;
361 bool discard:1;
362 bool writeback:1;
363 bool demote:1;
364 bool promote:1;
365 bool requeue_holder:1;
366 bool invalidate:1;
367
368 struct dm_bio_prison_cell *old_ocell;
369 struct dm_bio_prison_cell *new_ocell;
370};
371
372
373
374
375
376
377struct prealloc {
378 struct dm_cache_migration *mg;
379 struct dm_bio_prison_cell *cell1;
380 struct dm_bio_prison_cell *cell2;
381};
382
383static enum cache_metadata_mode get_cache_mode(struct cache *cache);
384
385static void wake_worker(struct cache *cache)
386{
387 queue_work(cache->wq, &cache->worker);
388}
389
390
391
392static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
393{
394
395 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
396}
397
398static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
399{
400 dm_bio_prison_free_cell(cache->prison, cell);
401}
402
403static struct dm_cache_migration *alloc_migration(struct cache *cache)
404{
405 struct dm_cache_migration *mg;
406
407 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
408 if (mg) {
409 mg->cache = cache;
410 atomic_inc(&mg->cache->nr_allocated_migrations);
411 }
412
413 return mg;
414}
415
416static void free_migration(struct dm_cache_migration *mg)
417{
418 struct cache *cache = mg->cache;
419
420 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
421 wake_up(&cache->migration_wait);
422
423 mempool_free(mg, cache->migration_pool);
424}
425
426static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
427{
428 if (!p->mg) {
429 p->mg = alloc_migration(cache);
430 if (!p->mg)
431 return -ENOMEM;
432 }
433
434 if (!p->cell1) {
435 p->cell1 = alloc_prison_cell(cache);
436 if (!p->cell1)
437 return -ENOMEM;
438 }
439
440 if (!p->cell2) {
441 p->cell2 = alloc_prison_cell(cache);
442 if (!p->cell2)
443 return -ENOMEM;
444 }
445
446 return 0;
447}
448
449static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
450{
451 if (p->cell2)
452 free_prison_cell(cache, p->cell2);
453
454 if (p->cell1)
455 free_prison_cell(cache, p->cell1);
456
457 if (p->mg)
458 free_migration(p->mg);
459}
460
461static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
462{
463 struct dm_cache_migration *mg = p->mg;
464
465 BUG_ON(!mg);
466 p->mg = NULL;
467
468 return mg;
469}
470
471
472
473
474
475static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
476{
477 struct dm_bio_prison_cell *r = NULL;
478
479 if (p->cell1) {
480 r = p->cell1;
481 p->cell1 = NULL;
482
483 } else if (p->cell2) {
484 r = p->cell2;
485 p->cell2 = NULL;
486 } else
487 BUG();
488
489 return r;
490}
491
492
493
494
495
496static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
497{
498 if (!p->cell2)
499 p->cell2 = cell;
500
501 else if (!p->cell1)
502 p->cell1 = cell;
503
504 else
505 BUG();
506}
507
508
509
510static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
511{
512 key->virtual = 0;
513 key->dev = 0;
514 key->block_begin = from_oblock(begin);
515 key->block_end = from_oblock(end);
516}
517
518
519
520
521
522
523typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
524
525static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
526 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
527 cell_free_fn free_fn, void *free_context,
528 struct dm_bio_prison_cell **cell_result)
529{
530 int r;
531 struct dm_cell_key key;
532
533 build_key(oblock_begin, oblock_end, &key);
534 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
535 if (r)
536 free_fn(free_context, cell_prealloc);
537
538 return r;
539}
540
541static int bio_detain(struct cache *cache, dm_oblock_t oblock,
542 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
543 cell_free_fn free_fn, void *free_context,
544 struct dm_bio_prison_cell **cell_result)
545{
546 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
547 return bio_detain_range(cache, oblock, end, bio,
548 cell_prealloc, free_fn, free_context, cell_result);
549}
550
551static int get_cell(struct cache *cache,
552 dm_oblock_t oblock,
553 struct prealloc *structs,
554 struct dm_bio_prison_cell **cell_result)
555{
556 int r;
557 struct dm_cell_key key;
558 struct dm_bio_prison_cell *cell_prealloc;
559
560 cell_prealloc = prealloc_get_cell(structs);
561
562 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
563 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
564 if (r)
565 prealloc_put_cell(structs, cell_prealloc);
566
567 return r;
568}
569
570
571
572static bool is_dirty(struct cache *cache, dm_cblock_t b)
573{
574 return test_bit(from_cblock(b), cache->dirty_bitset);
575}
576
577static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
578{
579 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
580 atomic_inc(&cache->nr_dirty);
581 policy_set_dirty(cache->policy, oblock);
582 }
583}
584
585static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
586{
587 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
588 policy_clear_dirty(cache->policy, oblock);
589 if (atomic_dec_return(&cache->nr_dirty) == 0)
590 dm_table_event(cache->ti->table);
591 }
592}
593
594
595
596static bool block_size_is_power_of_two(struct cache *cache)
597{
598 return cache->sectors_per_block_shift >= 0;
599}
600
601
602#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
603__always_inline
604#endif
605static dm_block_t block_div(dm_block_t b, uint32_t n)
606{
607 do_div(b, n);
608
609 return b;
610}
611
612static dm_block_t oblocks_per_dblock(struct cache *cache)
613{
614 dm_block_t oblocks = cache->discard_block_size;
615
616 if (block_size_is_power_of_two(cache))
617 oblocks >>= cache->sectors_per_block_shift;
618 else
619 oblocks = block_div(oblocks, cache->sectors_per_block);
620
621 return oblocks;
622}
623
624static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
625{
626 return to_dblock(block_div(from_oblock(oblock),
627 oblocks_per_dblock(cache)));
628}
629
630static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
631{
632 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
633}
634
635static void set_discard(struct cache *cache, dm_dblock_t b)
636{
637 unsigned long flags;
638
639 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
640 atomic_inc(&cache->stats.discard_count);
641
642 spin_lock_irqsave(&cache->lock, flags);
643 set_bit(from_dblock(b), cache->discard_bitset);
644 spin_unlock_irqrestore(&cache->lock, flags);
645}
646
647static void clear_discard(struct cache *cache, dm_dblock_t b)
648{
649 unsigned long flags;
650
651 spin_lock_irqsave(&cache->lock, flags);
652 clear_bit(from_dblock(b), cache->discard_bitset);
653 spin_unlock_irqrestore(&cache->lock, flags);
654}
655
656static bool is_discarded(struct cache *cache, dm_dblock_t b)
657{
658 int r;
659 unsigned long flags;
660
661 spin_lock_irqsave(&cache->lock, flags);
662 r = test_bit(from_dblock(b), cache->discard_bitset);
663 spin_unlock_irqrestore(&cache->lock, flags);
664
665 return r;
666}
667
668static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
669{
670 int r;
671 unsigned long flags;
672
673 spin_lock_irqsave(&cache->lock, flags);
674 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
675 cache->discard_bitset);
676 spin_unlock_irqrestore(&cache->lock, flags);
677
678 return r;
679}
680
681
682
683static void load_stats(struct cache *cache)
684{
685 struct dm_cache_statistics stats;
686
687 dm_cache_metadata_get_stats(cache->cmd, &stats);
688 atomic_set(&cache->stats.read_hit, stats.read_hits);
689 atomic_set(&cache->stats.read_miss, stats.read_misses);
690 atomic_set(&cache->stats.write_hit, stats.write_hits);
691 atomic_set(&cache->stats.write_miss, stats.write_misses);
692}
693
694static void save_stats(struct cache *cache)
695{
696 struct dm_cache_statistics stats;
697
698 if (get_cache_mode(cache) >= CM_READ_ONLY)
699 return;
700
701 stats.read_hits = atomic_read(&cache->stats.read_hit);
702 stats.read_misses = atomic_read(&cache->stats.read_miss);
703 stats.write_hits = atomic_read(&cache->stats.write_hit);
704 stats.write_misses = atomic_read(&cache->stats.write_miss);
705
706 dm_cache_metadata_set_stats(cache->cmd, &stats);
707}
708
709
710
711
712
713
714
715
716#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
717#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
718
719static bool writethrough_mode(struct cache_features *f)
720{
721 return f->io_mode == CM_IO_WRITETHROUGH;
722}
723
724static bool writeback_mode(struct cache_features *f)
725{
726 return f->io_mode == CM_IO_WRITEBACK;
727}
728
729static bool passthrough_mode(struct cache_features *f)
730{
731 return f->io_mode == CM_IO_PASSTHROUGH;
732}
733
734static size_t get_per_bio_data_size(struct cache *cache)
735{
736 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
737}
738
739static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
740{
741 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
742 BUG_ON(!pb);
743 return pb;
744}
745
746static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
747{
748 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
749
750 pb->tick = false;
751 pb->req_nr = dm_bio_get_target_bio_nr(bio);
752 pb->all_io_entry = NULL;
753 pb->len = 0;
754
755 return pb;
756}
757
758
759
760
761static void remap_to_origin(struct cache *cache, struct bio *bio)
762{
763 bio->bi_bdev = cache->origin_dev->bdev;
764}
765
766static void remap_to_cache(struct cache *cache, struct bio *bio,
767 dm_cblock_t cblock)
768{
769 sector_t bi_sector = bio->bi_iter.bi_sector;
770 sector_t block = from_cblock(cblock);
771
772 bio->bi_bdev = cache->cache_dev->bdev;
773 if (!block_size_is_power_of_two(cache))
774 bio->bi_iter.bi_sector =
775 (block * cache->sectors_per_block) +
776 sector_div(bi_sector, cache->sectors_per_block);
777 else
778 bio->bi_iter.bi_sector =
779 (block << cache->sectors_per_block_shift) |
780 (bi_sector & (cache->sectors_per_block - 1));
781}
782
783static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
784{
785 unsigned long flags;
786 size_t pb_data_size = get_per_bio_data_size(cache);
787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
788
789 spin_lock_irqsave(&cache->lock, flags);
790 if (cache->need_tick_bio &&
791 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
792 pb->tick = true;
793 cache->need_tick_bio = false;
794 }
795 spin_unlock_irqrestore(&cache->lock, flags);
796}
797
798static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
799 dm_oblock_t oblock)
800{
801 check_if_tick_bio_needed(cache, bio);
802 remap_to_origin(cache, bio);
803 if (bio_data_dir(bio) == WRITE)
804 clear_discard(cache, oblock_to_dblock(cache, oblock));
805}
806
807static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
808 dm_oblock_t oblock, dm_cblock_t cblock)
809{
810 check_if_tick_bio_needed(cache, bio);
811 remap_to_cache(cache, bio, cblock);
812 if (bio_data_dir(bio) == WRITE) {
813 set_dirty(cache, oblock, cblock);
814 clear_discard(cache, oblock_to_dblock(cache, oblock));
815 }
816}
817
818static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
819{
820 sector_t block_nr = bio->bi_iter.bi_sector;
821
822 if (!block_size_is_power_of_two(cache))
823 (void) sector_div(block_nr, cache->sectors_per_block);
824 else
825 block_nr >>= cache->sectors_per_block_shift;
826
827 return to_oblock(block_nr);
828}
829
830static int bio_triggers_commit(struct cache *cache, struct bio *bio)
831{
832 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
833}
834
835
836
837
838
839static void inc_ds(struct cache *cache, struct bio *bio,
840 struct dm_bio_prison_cell *cell)
841{
842 size_t pb_data_size = get_per_bio_data_size(cache);
843 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
844
845 BUG_ON(!cell);
846 BUG_ON(pb->all_io_entry);
847
848 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
849}
850
851static bool accountable_bio(struct cache *cache, struct bio *bio)
852{
853 return ((bio->bi_bdev == cache->origin_dev->bdev) &&
854 !(bio->bi_rw & REQ_DISCARD));
855}
856
857static void accounted_begin(struct cache *cache, struct bio *bio)
858{
859 size_t pb_data_size = get_per_bio_data_size(cache);
860 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
861
862 if (accountable_bio(cache, bio)) {
863 pb->len = bio_sectors(bio);
864 iot_io_begin(&cache->origin_tracker, pb->len);
865 }
866}
867
868static void accounted_complete(struct cache *cache, struct bio *bio)
869{
870 size_t pb_data_size = get_per_bio_data_size(cache);
871 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
872
873 iot_io_end(&cache->origin_tracker, pb->len);
874}
875
876static void accounted_request(struct cache *cache, struct bio *bio)
877{
878 accounted_begin(cache, bio);
879 generic_make_request(bio);
880}
881
882static void issue(struct cache *cache, struct bio *bio)
883{
884 unsigned long flags;
885
886 if (!bio_triggers_commit(cache, bio)) {
887 accounted_request(cache, bio);
888 return;
889 }
890
891
892
893
894
895 spin_lock_irqsave(&cache->lock, flags);
896 cache->commit_requested = true;
897 bio_list_add(&cache->deferred_flush_bios, bio);
898 spin_unlock_irqrestore(&cache->lock, flags);
899}
900
901static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
902{
903 inc_ds(cache, bio, cell);
904 issue(cache, bio);
905}
906
907static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
908{
909 unsigned long flags;
910
911 spin_lock_irqsave(&cache->lock, flags);
912 bio_list_add(&cache->deferred_writethrough_bios, bio);
913 spin_unlock_irqrestore(&cache->lock, flags);
914
915 wake_worker(cache);
916}
917
918static void writethrough_endio(struct bio *bio)
919{
920 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
921
922 dm_unhook_bio(&pb->hook_info, bio);
923
924 if (bio->bi_error) {
925 bio_endio(bio);
926 return;
927 }
928
929 dm_bio_restore(&pb->bio_details, bio);
930 remap_to_cache(pb->cache, bio, pb->cblock);
931
932
933
934
935
936
937 defer_writethrough_bio(pb->cache, bio);
938}
939
940
941
942
943
944
945
946static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
947 dm_oblock_t oblock, dm_cblock_t cblock)
948{
949 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
950
951 pb->cache = cache;
952 pb->cblock = cblock;
953 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
954 dm_bio_record(&pb->bio_details, bio);
955
956 remap_to_origin_clear_discard(pb->cache, bio, oblock);
957}
958
959
960
961
962static enum cache_metadata_mode get_cache_mode(struct cache *cache)
963{
964 return cache->features.mode;
965}
966
967static const char *cache_device_name(struct cache *cache)
968{
969 return dm_device_name(dm_table_get_md(cache->ti->table));
970}
971
972static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
973{
974 const char *descs[] = {
975 "write",
976 "read-only",
977 "fail"
978 };
979
980 dm_table_event(cache->ti->table);
981 DMINFO("%s: switching cache to %s mode",
982 cache_device_name(cache), descs[(int)mode]);
983}
984
985static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
986{
987 bool needs_check = dm_cache_metadata_needs_check(cache->cmd);
988 enum cache_metadata_mode old_mode = get_cache_mode(cache);
989
990 if (new_mode == CM_WRITE && needs_check) {
991 DMERR("%s: unable to switch cache to write mode until repaired.",
992 cache_device_name(cache));
993 if (old_mode != new_mode)
994 new_mode = old_mode;
995 else
996 new_mode = CM_READ_ONLY;
997 }
998
999
1000 if (old_mode == CM_FAIL)
1001 new_mode = CM_FAIL;
1002
1003 switch (new_mode) {
1004 case CM_FAIL:
1005 case CM_READ_ONLY:
1006 dm_cache_metadata_set_read_only(cache->cmd);
1007 break;
1008
1009 case CM_WRITE:
1010 dm_cache_metadata_set_read_write(cache->cmd);
1011 break;
1012 }
1013
1014 cache->features.mode = new_mode;
1015
1016 if (new_mode != old_mode)
1017 notify_mode_switch(cache, new_mode);
1018}
1019
1020static void abort_transaction(struct cache *cache)
1021{
1022 const char *dev_name = cache_device_name(cache);
1023
1024 if (get_cache_mode(cache) >= CM_READ_ONLY)
1025 return;
1026
1027 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
1028 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
1029 set_cache_mode(cache, CM_FAIL);
1030 }
1031
1032 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
1033 if (dm_cache_metadata_abort(cache->cmd)) {
1034 DMERR("%s: failed to abort metadata transaction", dev_name);
1035 set_cache_mode(cache, CM_FAIL);
1036 }
1037}
1038
1039static void metadata_operation_failed(struct cache *cache, const char *op, int r)
1040{
1041 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
1042 cache_device_name(cache), op, r);
1043 abort_transaction(cache);
1044 set_cache_mode(cache, CM_READ_ONLY);
1045}
1046
1047
1048
1049
1050
1051
1052
1053static void inc_io_migrations(struct cache *cache)
1054{
1055 atomic_inc(&cache->nr_io_migrations);
1056}
1057
1058static void dec_io_migrations(struct cache *cache)
1059{
1060 atomic_dec(&cache->nr_io_migrations);
1061}
1062
1063static bool discard_or_flush(struct bio *bio)
1064{
1065 return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD);
1066}
1067
1068static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
1069{
1070 if (discard_or_flush(cell->holder)) {
1071
1072
1073
1074 dm_cell_release(cache->prison, cell, &cache->deferred_bios);
1075 free_prison_cell(cache, cell);
1076 } else
1077 list_add_tail(&cell->user_list, &cache->deferred_cells);
1078}
1079
1080static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder)
1081{
1082 unsigned long flags;
1083
1084 if (!holder && dm_cell_promote_or_release(cache->prison, cell)) {
1085
1086
1087
1088
1089 free_prison_cell(cache, cell);
1090 return;
1091 }
1092
1093 spin_lock_irqsave(&cache->lock, flags);
1094 __cell_defer(cache, cell);
1095 spin_unlock_irqrestore(&cache->lock, flags);
1096
1097 wake_worker(cache);
1098}
1099
1100static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err)
1101{
1102 dm_cell_error(cache->prison, cell, err);
1103 free_prison_cell(cache, cell);
1104}
1105
1106static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell)
1107{
1108 cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE);
1109}
1110
1111static void free_io_migration(struct dm_cache_migration *mg)
1112{
1113 struct cache *cache = mg->cache;
1114
1115 dec_io_migrations(cache);
1116 free_migration(mg);
1117 wake_worker(cache);
1118}
1119
1120static void migration_failure(struct dm_cache_migration *mg)
1121{
1122 struct cache *cache = mg->cache;
1123 const char *dev_name = cache_device_name(cache);
1124
1125 if (mg->writeback) {
1126 DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name);
1127 set_dirty(cache, mg->old_oblock, mg->cblock);
1128 cell_defer(cache, mg->old_ocell, false);
1129
1130 } else if (mg->demote) {
1131 DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name);
1132 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
1133
1134 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1135 if (mg->promote)
1136 cell_defer(cache, mg->new_ocell, true);
1137 } else {
1138 DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name);
1139 policy_remove_mapping(cache->policy, mg->new_oblock);
1140 cell_defer(cache, mg->new_ocell, true);
1141 }
1142
1143 free_io_migration(mg);
1144}
1145
1146static void migration_success_pre_commit(struct dm_cache_migration *mg)
1147{
1148 int r;
1149 unsigned long flags;
1150 struct cache *cache = mg->cache;
1151
1152 if (mg->writeback) {
1153 clear_dirty(cache, mg->old_oblock, mg->cblock);
1154 cell_defer(cache, mg->old_ocell, false);
1155 free_io_migration(mg);
1156 return;
1157
1158 } else if (mg->demote) {
1159 r = dm_cache_remove_mapping(cache->cmd, mg->cblock);
1160 if (r) {
1161 DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata",
1162 cache_device_name(cache));
1163 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1164 policy_force_mapping(cache->policy, mg->new_oblock,
1165 mg->old_oblock);
1166 if (mg->promote)
1167 cell_defer(cache, mg->new_ocell, true);
1168 free_io_migration(mg);
1169 return;
1170 }
1171 } else {
1172 r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock);
1173 if (r) {
1174 DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata",
1175 cache_device_name(cache));
1176 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1177 policy_remove_mapping(cache->policy, mg->new_oblock);
1178 free_io_migration(mg);
1179 return;
1180 }
1181 }
1182
1183 spin_lock_irqsave(&cache->lock, flags);
1184 list_add_tail(&mg->list, &cache->need_commit_migrations);
1185 cache->commit_requested = true;
1186 spin_unlock_irqrestore(&cache->lock, flags);
1187}
1188
1189static void migration_success_post_commit(struct dm_cache_migration *mg)
1190{
1191 unsigned long flags;
1192 struct cache *cache = mg->cache;
1193
1194 if (mg->writeback) {
1195 DMWARN_LIMIT("%s: writeback unexpectedly triggered commit",
1196 cache_device_name(cache));
1197 return;
1198
1199 } else if (mg->demote) {
1200 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
1201
1202 if (mg->promote) {
1203 mg->demote = false;
1204
1205 spin_lock_irqsave(&cache->lock, flags);
1206 list_add_tail(&mg->list, &cache->quiesced_migrations);
1207 spin_unlock_irqrestore(&cache->lock, flags);
1208
1209 } else {
1210 if (mg->invalidate)
1211 policy_remove_mapping(cache->policy, mg->old_oblock);
1212 free_io_migration(mg);
1213 }
1214
1215 } else {
1216 if (mg->requeue_holder) {
1217 clear_dirty(cache, mg->new_oblock, mg->cblock);
1218 cell_defer(cache, mg->new_ocell, true);
1219 } else {
1220
1221
1222
1223 set_dirty(cache, mg->new_oblock, mg->cblock);
1224 bio_endio(mg->new_ocell->holder);
1225 cell_defer(cache, mg->new_ocell, false);
1226 }
1227 free_io_migration(mg);
1228 }
1229}
1230
1231static void copy_complete(int read_err, unsigned long write_err, void *context)
1232{
1233 unsigned long flags;
1234 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1235 struct cache *cache = mg->cache;
1236
1237 if (read_err || write_err)
1238 mg->err = true;
1239
1240 spin_lock_irqsave(&cache->lock, flags);
1241 list_add_tail(&mg->list, &cache->completed_migrations);
1242 spin_unlock_irqrestore(&cache->lock, flags);
1243
1244 wake_worker(cache);
1245}
1246
1247static void issue_copy(struct dm_cache_migration *mg)
1248{
1249 int r;
1250 struct dm_io_region o_region, c_region;
1251 struct cache *cache = mg->cache;
1252 sector_t cblock = from_cblock(mg->cblock);
1253
1254 o_region.bdev = cache->origin_dev->bdev;
1255 o_region.count = cache->sectors_per_block;
1256
1257 c_region.bdev = cache->cache_dev->bdev;
1258 c_region.sector = cblock * cache->sectors_per_block;
1259 c_region.count = cache->sectors_per_block;
1260
1261 if (mg->writeback || mg->demote) {
1262
1263 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1264 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1265 } else {
1266
1267 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1268 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1269 }
1270
1271 if (r < 0) {
1272 DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache));
1273 migration_failure(mg);
1274 }
1275}
1276
1277static void overwrite_endio(struct bio *bio)
1278{
1279 struct dm_cache_migration *mg = bio->bi_private;
1280 struct cache *cache = mg->cache;
1281 size_t pb_data_size = get_per_bio_data_size(cache);
1282 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1283 unsigned long flags;
1284
1285 dm_unhook_bio(&pb->hook_info, bio);
1286
1287 if (bio->bi_error)
1288 mg->err = true;
1289
1290 mg->requeue_holder = false;
1291
1292 spin_lock_irqsave(&cache->lock, flags);
1293 list_add_tail(&mg->list, &cache->completed_migrations);
1294 spin_unlock_irqrestore(&cache->lock, flags);
1295
1296 wake_worker(cache);
1297}
1298
1299static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1300{
1301 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1302 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1303
1304 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1305 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1306
1307
1308
1309
1310
1311 accounted_request(mg->cache, bio);
1312}
1313
1314static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1315{
1316 return (bio_data_dir(bio) == WRITE) &&
1317 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1318}
1319
1320static void avoid_copy(struct dm_cache_migration *mg)
1321{
1322 atomic_inc(&mg->cache->stats.copies_avoided);
1323 migration_success_pre_commit(mg);
1324}
1325
1326static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1327 dm_dblock_t *b, dm_dblock_t *e)
1328{
1329 sector_t sb = bio->bi_iter.bi_sector;
1330 sector_t se = bio_end_sector(bio);
1331
1332 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1333
1334 if (se - sb < cache->discard_block_size)
1335 *e = *b;
1336 else
1337 *e = to_dblock(block_div(se, cache->discard_block_size));
1338}
1339
1340static void issue_discard(struct dm_cache_migration *mg)
1341{
1342 dm_dblock_t b, e;
1343 struct bio *bio = mg->new_ocell->holder;
1344 struct cache *cache = mg->cache;
1345
1346 calc_discard_block_range(cache, bio, &b, &e);
1347 while (b != e) {
1348 set_discard(cache, b);
1349 b = to_dblock(from_dblock(b) + 1);
1350 }
1351
1352 bio_endio(bio);
1353 cell_defer(cache, mg->new_ocell, false);
1354 free_migration(mg);
1355 wake_worker(cache);
1356}
1357
1358static void issue_copy_or_discard(struct dm_cache_migration *mg)
1359{
1360 bool avoid;
1361 struct cache *cache = mg->cache;
1362
1363 if (mg->discard) {
1364 issue_discard(mg);
1365 return;
1366 }
1367
1368 if (mg->writeback || mg->demote)
1369 avoid = !is_dirty(cache, mg->cblock) ||
1370 is_discarded_oblock(cache, mg->old_oblock);
1371 else {
1372 struct bio *bio = mg->new_ocell->holder;
1373
1374 avoid = is_discarded_oblock(cache, mg->new_oblock);
1375
1376 if (writeback_mode(&cache->features) &&
1377 !avoid && bio_writes_complete_block(cache, bio)) {
1378 issue_overwrite(mg, bio);
1379 return;
1380 }
1381 }
1382
1383 avoid ? avoid_copy(mg) : issue_copy(mg);
1384}
1385
1386static void complete_migration(struct dm_cache_migration *mg)
1387{
1388 if (mg->err)
1389 migration_failure(mg);
1390 else
1391 migration_success_pre_commit(mg);
1392}
1393
1394static void process_migrations(struct cache *cache, struct list_head *head,
1395 void (*fn)(struct dm_cache_migration *))
1396{
1397 unsigned long flags;
1398 struct list_head list;
1399 struct dm_cache_migration *mg, *tmp;
1400
1401 INIT_LIST_HEAD(&list);
1402 spin_lock_irqsave(&cache->lock, flags);
1403 list_splice_init(head, &list);
1404 spin_unlock_irqrestore(&cache->lock, flags);
1405
1406 list_for_each_entry_safe(mg, tmp, &list, list)
1407 fn(mg);
1408}
1409
1410static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1411{
1412 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1413}
1414
1415static void queue_quiesced_migration(struct dm_cache_migration *mg)
1416{
1417 unsigned long flags;
1418 struct cache *cache = mg->cache;
1419
1420 spin_lock_irqsave(&cache->lock, flags);
1421 __queue_quiesced_migration(mg);
1422 spin_unlock_irqrestore(&cache->lock, flags);
1423
1424 wake_worker(cache);
1425}
1426
1427static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1428{
1429 unsigned long flags;
1430 struct dm_cache_migration *mg, *tmp;
1431
1432 spin_lock_irqsave(&cache->lock, flags);
1433 list_for_each_entry_safe(mg, tmp, work, list)
1434 __queue_quiesced_migration(mg);
1435 spin_unlock_irqrestore(&cache->lock, flags);
1436
1437 wake_worker(cache);
1438}
1439
1440static void check_for_quiesced_migrations(struct cache *cache,
1441 struct per_bio_data *pb)
1442{
1443 struct list_head work;
1444
1445 if (!pb->all_io_entry)
1446 return;
1447
1448 INIT_LIST_HEAD(&work);
1449 dm_deferred_entry_dec(pb->all_io_entry, &work);
1450
1451 if (!list_empty(&work))
1452 queue_quiesced_migrations(cache, &work);
1453}
1454
1455static void quiesce_migration(struct dm_cache_migration *mg)
1456{
1457 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1458 queue_quiesced_migration(mg);
1459}
1460
1461static void promote(struct cache *cache, struct prealloc *structs,
1462 dm_oblock_t oblock, dm_cblock_t cblock,
1463 struct dm_bio_prison_cell *cell)
1464{
1465 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1466
1467 mg->err = false;
1468 mg->discard = false;
1469 mg->writeback = false;
1470 mg->demote = false;
1471 mg->promote = true;
1472 mg->requeue_holder = true;
1473 mg->invalidate = false;
1474 mg->cache = cache;
1475 mg->new_oblock = oblock;
1476 mg->cblock = cblock;
1477 mg->old_ocell = NULL;
1478 mg->new_ocell = cell;
1479 mg->start_jiffies = jiffies;
1480
1481 inc_io_migrations(cache);
1482 quiesce_migration(mg);
1483}
1484
1485static void writeback(struct cache *cache, struct prealloc *structs,
1486 dm_oblock_t oblock, dm_cblock_t cblock,
1487 struct dm_bio_prison_cell *cell)
1488{
1489 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1490
1491 mg->err = false;
1492 mg->discard = false;
1493 mg->writeback = true;
1494 mg->demote = false;
1495 mg->promote = false;
1496 mg->requeue_holder = true;
1497 mg->invalidate = false;
1498 mg->cache = cache;
1499 mg->old_oblock = oblock;
1500 mg->cblock = cblock;
1501 mg->old_ocell = cell;
1502 mg->new_ocell = NULL;
1503 mg->start_jiffies = jiffies;
1504
1505 inc_io_migrations(cache);
1506 quiesce_migration(mg);
1507}
1508
1509static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1510 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1511 dm_cblock_t cblock,
1512 struct dm_bio_prison_cell *old_ocell,
1513 struct dm_bio_prison_cell *new_ocell)
1514{
1515 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1516
1517 mg->err = false;
1518 mg->discard = false;
1519 mg->writeback = false;
1520 mg->demote = true;
1521 mg->promote = true;
1522 mg->requeue_holder = true;
1523 mg->invalidate = false;
1524 mg->cache = cache;
1525 mg->old_oblock = old_oblock;
1526 mg->new_oblock = new_oblock;
1527 mg->cblock = cblock;
1528 mg->old_ocell = old_ocell;
1529 mg->new_ocell = new_ocell;
1530 mg->start_jiffies = jiffies;
1531
1532 inc_io_migrations(cache);
1533 quiesce_migration(mg);
1534}
1535
1536
1537
1538
1539
1540static void invalidate(struct cache *cache, struct prealloc *structs,
1541 dm_oblock_t oblock, dm_cblock_t cblock,
1542 struct dm_bio_prison_cell *cell)
1543{
1544 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1545
1546 mg->err = false;
1547 mg->discard = false;
1548 mg->writeback = false;
1549 mg->demote = true;
1550 mg->promote = false;
1551 mg->requeue_holder = true;
1552 mg->invalidate = true;
1553 mg->cache = cache;
1554 mg->old_oblock = oblock;
1555 mg->cblock = cblock;
1556 mg->old_ocell = cell;
1557 mg->new_ocell = NULL;
1558 mg->start_jiffies = jiffies;
1559
1560 inc_io_migrations(cache);
1561 quiesce_migration(mg);
1562}
1563
1564static void discard(struct cache *cache, struct prealloc *structs,
1565 struct dm_bio_prison_cell *cell)
1566{
1567 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1568
1569 mg->err = false;
1570 mg->discard = true;
1571 mg->writeback = false;
1572 mg->demote = false;
1573 mg->promote = false;
1574 mg->requeue_holder = false;
1575 mg->invalidate = false;
1576 mg->cache = cache;
1577 mg->old_ocell = NULL;
1578 mg->new_ocell = cell;
1579 mg->start_jiffies = jiffies;
1580
1581 quiesce_migration(mg);
1582}
1583
1584
1585
1586
1587static void defer_bio(struct cache *cache, struct bio *bio)
1588{
1589 unsigned long flags;
1590
1591 spin_lock_irqsave(&cache->lock, flags);
1592 bio_list_add(&cache->deferred_bios, bio);
1593 spin_unlock_irqrestore(&cache->lock, flags);
1594
1595 wake_worker(cache);
1596}
1597
1598static void process_flush_bio(struct cache *cache, struct bio *bio)
1599{
1600 size_t pb_data_size = get_per_bio_data_size(cache);
1601 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1602
1603 BUG_ON(bio->bi_iter.bi_size);
1604 if (!pb->req_nr)
1605 remap_to_origin(cache, bio);
1606 else
1607 remap_to_cache(cache, bio, 0);
1608
1609
1610
1611
1612
1613
1614 issue(cache, bio);
1615}
1616
1617static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1618 struct bio *bio)
1619{
1620 int r;
1621 dm_dblock_t b, e;
1622 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1623
1624 calc_discard_block_range(cache, bio, &b, &e);
1625 if (b == e) {
1626 bio_endio(bio);
1627 return;
1628 }
1629
1630 cell_prealloc = prealloc_get_cell(structs);
1631 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1632 (cell_free_fn) prealloc_put_cell,
1633 structs, &new_ocell);
1634 if (r > 0)
1635 return;
1636
1637 discard(cache, structs, new_ocell);
1638}
1639
1640static bool spare_migration_bandwidth(struct cache *cache)
1641{
1642 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1643 cache->sectors_per_block;
1644 return current_volume < cache->migration_threshold;
1645}
1646
1647static void inc_hit_counter(struct cache *cache, struct bio *bio)
1648{
1649 atomic_inc(bio_data_dir(bio) == READ ?
1650 &cache->stats.read_hit : &cache->stats.write_hit);
1651}
1652
1653static void inc_miss_counter(struct cache *cache, struct bio *bio)
1654{
1655 atomic_inc(bio_data_dir(bio) == READ ?
1656 &cache->stats.read_miss : &cache->stats.write_miss);
1657}
1658
1659
1660
1661struct inc_detail {
1662 struct cache *cache;
1663 struct bio_list bios_for_issue;
1664 struct bio_list unhandled_bios;
1665 bool any_writes;
1666};
1667
1668static void inc_fn(void *context, struct dm_bio_prison_cell *cell)
1669{
1670 struct bio *bio;
1671 struct inc_detail *detail = context;
1672 struct cache *cache = detail->cache;
1673
1674 inc_ds(cache, cell->holder, cell);
1675 if (bio_data_dir(cell->holder) == WRITE)
1676 detail->any_writes = true;
1677
1678 while ((bio = bio_list_pop(&cell->bios))) {
1679 if (discard_or_flush(bio)) {
1680 bio_list_add(&detail->unhandled_bios, bio);
1681 continue;
1682 }
1683
1684 if (bio_data_dir(bio) == WRITE)
1685 detail->any_writes = true;
1686
1687 bio_list_add(&detail->bios_for_issue, bio);
1688 inc_ds(cache, bio, cell);
1689 }
1690}
1691
1692
1693static void remap_cell_to_origin_clear_discard(struct cache *cache,
1694 struct dm_bio_prison_cell *cell,
1695 dm_oblock_t oblock, bool issue_holder)
1696{
1697 struct bio *bio;
1698 unsigned long flags;
1699 struct inc_detail detail;
1700
1701 detail.cache = cache;
1702 bio_list_init(&detail.bios_for_issue);
1703 bio_list_init(&detail.unhandled_bios);
1704 detail.any_writes = false;
1705
1706 spin_lock_irqsave(&cache->lock, flags);
1707 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1708 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1709 spin_unlock_irqrestore(&cache->lock, flags);
1710
1711 remap_to_origin(cache, cell->holder);
1712 if (issue_holder)
1713 issue(cache, cell->holder);
1714 else
1715 accounted_begin(cache, cell->holder);
1716
1717 if (detail.any_writes)
1718 clear_discard(cache, oblock_to_dblock(cache, oblock));
1719
1720 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1721 remap_to_origin(cache, bio);
1722 issue(cache, bio);
1723 }
1724
1725 free_prison_cell(cache, cell);
1726}
1727
1728static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell,
1729 dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder)
1730{
1731 struct bio *bio;
1732 unsigned long flags;
1733 struct inc_detail detail;
1734
1735 detail.cache = cache;
1736 bio_list_init(&detail.bios_for_issue);
1737 bio_list_init(&detail.unhandled_bios);
1738 detail.any_writes = false;
1739
1740 spin_lock_irqsave(&cache->lock, flags);
1741 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell);
1742 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios);
1743 spin_unlock_irqrestore(&cache->lock, flags);
1744
1745 remap_to_cache(cache, cell->holder, cblock);
1746 if (issue_holder)
1747 issue(cache, cell->holder);
1748 else
1749 accounted_begin(cache, cell->holder);
1750
1751 if (detail.any_writes) {
1752 set_dirty(cache, oblock, cblock);
1753 clear_discard(cache, oblock_to_dblock(cache, oblock));
1754 }
1755
1756 while ((bio = bio_list_pop(&detail.bios_for_issue))) {
1757 remap_to_cache(cache, bio, cblock);
1758 issue(cache, bio);
1759 }
1760
1761 free_prison_cell(cache, cell);
1762}
1763
1764
1765
1766struct old_oblock_lock {
1767 struct policy_locker locker;
1768 struct cache *cache;
1769 struct prealloc *structs;
1770 struct dm_bio_prison_cell *cell;
1771};
1772
1773static int null_locker(struct policy_locker *locker, dm_oblock_t b)
1774{
1775
1776 BUG();
1777 return 0;
1778}
1779
1780static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
1781{
1782 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
1783 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
1784
1785 return bio_detain(l->cache, b, NULL, cell_prealloc,
1786 (cell_free_fn) prealloc_put_cell,
1787 l->structs, &l->cell);
1788}
1789
1790static void process_cell(struct cache *cache, struct prealloc *structs,
1791 struct dm_bio_prison_cell *new_ocell)
1792{
1793 int r;
1794 bool release_cell = true;
1795 struct bio *bio = new_ocell->holder;
1796 dm_oblock_t block = get_bio_block(cache, bio);
1797 struct policy_result lookup_result;
1798 bool passthrough = passthrough_mode(&cache->features);
1799 bool fast_promotion, can_migrate;
1800 struct old_oblock_lock ool;
1801
1802 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
1803 can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache));
1804
1805 ool.locker.fn = cell_locker;
1806 ool.cache = cache;
1807 ool.structs = structs;
1808 ool.cell = NULL;
1809 r = policy_map(cache->policy, block, true, can_migrate, fast_promotion,
1810 bio, &ool.locker, &lookup_result);
1811
1812 if (r == -EWOULDBLOCK)
1813
1814 lookup_result.op = POLICY_MISS;
1815
1816 switch (lookup_result.op) {
1817 case POLICY_HIT:
1818 if (passthrough) {
1819 inc_miss_counter(cache, bio);
1820
1821
1822
1823
1824
1825
1826
1827 if (bio_data_dir(bio) == WRITE) {
1828 atomic_inc(&cache->stats.demotion);
1829 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1830 release_cell = false;
1831
1832 } else {
1833
1834 remap_to_origin_clear_discard(cache, bio, block);
1835 inc_and_issue(cache, bio, new_ocell);
1836 }
1837 } else {
1838 inc_hit_counter(cache, bio);
1839
1840 if (bio_data_dir(bio) == WRITE &&
1841 writethrough_mode(&cache->features) &&
1842 !is_dirty(cache, lookup_result.cblock)) {
1843 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1844 inc_and_issue(cache, bio, new_ocell);
1845
1846 } else {
1847 remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true);
1848 release_cell = false;
1849 }
1850 }
1851
1852 break;
1853
1854 case POLICY_MISS:
1855 inc_miss_counter(cache, bio);
1856 remap_cell_to_origin_clear_discard(cache, new_ocell, block, true);
1857 release_cell = false;
1858 break;
1859
1860 case POLICY_NEW:
1861 atomic_inc(&cache->stats.promotion);
1862 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1863 release_cell = false;
1864 break;
1865
1866 case POLICY_REPLACE:
1867 atomic_inc(&cache->stats.demotion);
1868 atomic_inc(&cache->stats.promotion);
1869 demote_then_promote(cache, structs, lookup_result.old_oblock,
1870 block, lookup_result.cblock,
1871 ool.cell, new_ocell);
1872 release_cell = false;
1873 break;
1874
1875 default:
1876 DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u",
1877 cache_device_name(cache), __func__,
1878 (unsigned) lookup_result.op);
1879 bio_io_error(bio);
1880 }
1881
1882 if (release_cell)
1883 cell_defer(cache, new_ocell, false);
1884}
1885
1886static void process_bio(struct cache *cache, struct prealloc *structs,
1887 struct bio *bio)
1888{
1889 int r;
1890 dm_oblock_t block = get_bio_block(cache, bio);
1891 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
1892
1893
1894
1895
1896 cell_prealloc = prealloc_get_cell(structs);
1897 r = bio_detain(cache, block, bio, cell_prealloc,
1898 (cell_free_fn) prealloc_put_cell,
1899 structs, &new_ocell);
1900 if (r > 0)
1901 return;
1902
1903 process_cell(cache, structs, new_ocell);
1904}
1905
1906static int need_commit_due_to_time(struct cache *cache)
1907{
1908 return jiffies < cache->last_commit_jiffies ||
1909 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
1910}
1911
1912
1913
1914
1915static int commit(struct cache *cache, bool clean_shutdown)
1916{
1917 int r;
1918
1919 if (get_cache_mode(cache) >= CM_READ_ONLY)
1920 return -EINVAL;
1921
1922 atomic_inc(&cache->stats.commit_count);
1923 r = dm_cache_commit(cache->cmd, clean_shutdown);
1924 if (r)
1925 metadata_operation_failed(cache, "dm_cache_commit", r);
1926
1927 return r;
1928}
1929
1930static int commit_if_needed(struct cache *cache)
1931{
1932 int r = 0;
1933
1934 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1935 dm_cache_changed_this_transaction(cache->cmd)) {
1936 r = commit(cache, false);
1937 cache->commit_requested = false;
1938 cache->last_commit_jiffies = jiffies;
1939 }
1940
1941 return r;
1942}
1943
1944static void process_deferred_bios(struct cache *cache)
1945{
1946 bool prealloc_used = false;
1947 unsigned long flags;
1948 struct bio_list bios;
1949 struct bio *bio;
1950 struct prealloc structs;
1951
1952 memset(&structs, 0, sizeof(structs));
1953 bio_list_init(&bios);
1954
1955 spin_lock_irqsave(&cache->lock, flags);
1956 bio_list_merge(&bios, &cache->deferred_bios);
1957 bio_list_init(&cache->deferred_bios);
1958 spin_unlock_irqrestore(&cache->lock, flags);
1959
1960 while (!bio_list_empty(&bios)) {
1961
1962
1963
1964
1965
1966 prealloc_used = true;
1967 if (prealloc_data_structs(cache, &structs)) {
1968 spin_lock_irqsave(&cache->lock, flags);
1969 bio_list_merge(&cache->deferred_bios, &bios);
1970 spin_unlock_irqrestore(&cache->lock, flags);
1971 break;
1972 }
1973
1974 bio = bio_list_pop(&bios);
1975
1976 if (bio->bi_rw & REQ_FLUSH)
1977 process_flush_bio(cache, bio);
1978 else if (bio->bi_rw & REQ_DISCARD)
1979 process_discard_bio(cache, &structs, bio);
1980 else
1981 process_bio(cache, &structs, bio);
1982 }
1983
1984 if (prealloc_used)
1985 prealloc_free_structs(cache, &structs);
1986}
1987
1988static void process_deferred_cells(struct cache *cache)
1989{
1990 bool prealloc_used = false;
1991 unsigned long flags;
1992 struct dm_bio_prison_cell *cell, *tmp;
1993 struct list_head cells;
1994 struct prealloc structs;
1995
1996 memset(&structs, 0, sizeof(structs));
1997
1998 INIT_LIST_HEAD(&cells);
1999
2000 spin_lock_irqsave(&cache->lock, flags);
2001 list_splice_init(&cache->deferred_cells, &cells);
2002 spin_unlock_irqrestore(&cache->lock, flags);
2003
2004 list_for_each_entry_safe(cell, tmp, &cells, user_list) {
2005
2006
2007
2008
2009
2010 prealloc_used = true;
2011 if (prealloc_data_structs(cache, &structs)) {
2012 spin_lock_irqsave(&cache->lock, flags);
2013 list_splice(&cells, &cache->deferred_cells);
2014 spin_unlock_irqrestore(&cache->lock, flags);
2015 break;
2016 }
2017
2018 process_cell(cache, &structs, cell);
2019 }
2020
2021 if (prealloc_used)
2022 prealloc_free_structs(cache, &structs);
2023}
2024
2025static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
2026{
2027 unsigned long flags;
2028 struct bio_list bios;
2029 struct bio *bio;
2030
2031 bio_list_init(&bios);
2032
2033 spin_lock_irqsave(&cache->lock, flags);
2034 bio_list_merge(&bios, &cache->deferred_flush_bios);
2035 bio_list_init(&cache->deferred_flush_bios);
2036 spin_unlock_irqrestore(&cache->lock, flags);
2037
2038
2039
2040
2041 while ((bio = bio_list_pop(&bios)))
2042 submit_bios ? accounted_request(cache, bio) : bio_io_error(bio);
2043}
2044
2045static void process_deferred_writethrough_bios(struct cache *cache)
2046{
2047 unsigned long flags;
2048 struct bio_list bios;
2049 struct bio *bio;
2050
2051 bio_list_init(&bios);
2052
2053 spin_lock_irqsave(&cache->lock, flags);
2054 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
2055 bio_list_init(&cache->deferred_writethrough_bios);
2056 spin_unlock_irqrestore(&cache->lock, flags);
2057
2058
2059
2060
2061 while ((bio = bio_list_pop(&bios)))
2062 accounted_request(cache, bio);
2063}
2064
2065static void writeback_some_dirty_blocks(struct cache *cache)
2066{
2067 bool prealloc_used = false;
2068 dm_oblock_t oblock;
2069 dm_cblock_t cblock;
2070 struct prealloc structs;
2071 struct dm_bio_prison_cell *old_ocell;
2072 bool busy = !iot_idle_for(&cache->origin_tracker, HZ);
2073
2074 memset(&structs, 0, sizeof(structs));
2075
2076 while (spare_migration_bandwidth(cache)) {
2077 if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
2078 break;
2079
2080 prealloc_used = true;
2081 if (prealloc_data_structs(cache, &structs) ||
2082 get_cell(cache, oblock, &structs, &old_ocell)) {
2083 policy_set_dirty(cache->policy, oblock);
2084 break;
2085 }
2086
2087 writeback(cache, &structs, oblock, cblock, old_ocell);
2088 }
2089
2090 if (prealloc_used)
2091 prealloc_free_structs(cache, &structs);
2092}
2093
2094
2095
2096
2097
2098
2099static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
2100{
2101 int r = 0;
2102 uint64_t begin = from_cblock(req->cblocks->begin);
2103 uint64_t end = from_cblock(req->cblocks->end);
2104
2105 while (begin != end) {
2106 r = policy_remove_cblock(cache->policy, to_cblock(begin));
2107 if (!r) {
2108 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
2109 if (r) {
2110 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
2111 break;
2112 }
2113
2114 } else if (r == -ENODATA) {
2115
2116 r = 0;
2117
2118 } else {
2119 DMERR("%s: policy_remove_cblock failed", cache_device_name(cache));
2120 break;
2121 }
2122
2123 begin++;
2124 }
2125
2126 cache->commit_requested = true;
2127
2128 req->err = r;
2129 atomic_set(&req->complete, 1);
2130
2131 wake_up(&req->result_wait);
2132}
2133
2134static void process_invalidation_requests(struct cache *cache)
2135{
2136 struct list_head list;
2137 struct invalidation_request *req, *tmp;
2138
2139 INIT_LIST_HEAD(&list);
2140 spin_lock(&cache->invalidation_lock);
2141 list_splice_init(&cache->invalidation_requests, &list);
2142 spin_unlock(&cache->invalidation_lock);
2143
2144 list_for_each_entry_safe (req, tmp, &list, list)
2145 process_invalidation_request(cache, req);
2146}
2147
2148
2149
2150
2151static bool is_quiescing(struct cache *cache)
2152{
2153 return atomic_read(&cache->quiescing);
2154}
2155
2156static void ack_quiescing(struct cache *cache)
2157{
2158 if (is_quiescing(cache)) {
2159 atomic_inc(&cache->quiescing_ack);
2160 wake_up(&cache->quiescing_wait);
2161 }
2162}
2163
2164static void wait_for_quiescing_ack(struct cache *cache)
2165{
2166 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
2167}
2168
2169static void start_quiescing(struct cache *cache)
2170{
2171 atomic_inc(&cache->quiescing);
2172 wait_for_quiescing_ack(cache);
2173}
2174
2175static void stop_quiescing(struct cache *cache)
2176{
2177 atomic_set(&cache->quiescing, 0);
2178 atomic_set(&cache->quiescing_ack, 0);
2179}
2180
2181static void wait_for_migrations(struct cache *cache)
2182{
2183 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
2184}
2185
2186static void stop_worker(struct cache *cache)
2187{
2188 cancel_delayed_work(&cache->waker);
2189 flush_workqueue(cache->wq);
2190}
2191
2192static void requeue_deferred_cells(struct cache *cache)
2193{
2194 unsigned long flags;
2195 struct list_head cells;
2196 struct dm_bio_prison_cell *cell, *tmp;
2197
2198 INIT_LIST_HEAD(&cells);
2199 spin_lock_irqsave(&cache->lock, flags);
2200 list_splice_init(&cache->deferred_cells, &cells);
2201 spin_unlock_irqrestore(&cache->lock, flags);
2202
2203 list_for_each_entry_safe(cell, tmp, &cells, user_list)
2204 cell_requeue(cache, cell);
2205}
2206
2207static void requeue_deferred_bios(struct cache *cache)
2208{
2209 struct bio *bio;
2210 struct bio_list bios;
2211
2212 bio_list_init(&bios);
2213 bio_list_merge(&bios, &cache->deferred_bios);
2214 bio_list_init(&cache->deferred_bios);
2215
2216 while ((bio = bio_list_pop(&bios))) {
2217 bio->bi_error = DM_ENDIO_REQUEUE;
2218 bio_endio(bio);
2219 }
2220}
2221
2222static int more_work(struct cache *cache)
2223{
2224 if (is_quiescing(cache))
2225 return !list_empty(&cache->quiesced_migrations) ||
2226 !list_empty(&cache->completed_migrations) ||
2227 !list_empty(&cache->need_commit_migrations);
2228 else
2229 return !bio_list_empty(&cache->deferred_bios) ||
2230 !list_empty(&cache->deferred_cells) ||
2231 !bio_list_empty(&cache->deferred_flush_bios) ||
2232 !bio_list_empty(&cache->deferred_writethrough_bios) ||
2233 !list_empty(&cache->quiesced_migrations) ||
2234 !list_empty(&cache->completed_migrations) ||
2235 !list_empty(&cache->need_commit_migrations) ||
2236 cache->invalidate;
2237}
2238
2239static void do_worker(struct work_struct *ws)
2240{
2241 struct cache *cache = container_of(ws, struct cache, worker);
2242
2243 do {
2244 if (!is_quiescing(cache)) {
2245 writeback_some_dirty_blocks(cache);
2246 process_deferred_writethrough_bios(cache);
2247 process_deferred_bios(cache);
2248 process_deferred_cells(cache);
2249 process_invalidation_requests(cache);
2250 }
2251
2252 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
2253 process_migrations(cache, &cache->completed_migrations, complete_migration);
2254
2255 if (commit_if_needed(cache)) {
2256 process_deferred_flush_bios(cache, false);
2257 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
2258 } else {
2259 process_deferred_flush_bios(cache, true);
2260 process_migrations(cache, &cache->need_commit_migrations,
2261 migration_success_post_commit);
2262 }
2263
2264 ack_quiescing(cache);
2265
2266 } while (more_work(cache));
2267}
2268
2269
2270
2271
2272
2273static void do_waker(struct work_struct *ws)
2274{
2275 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
2276 policy_tick(cache->policy, true);
2277 wake_worker(cache);
2278 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
2279}
2280
2281
2282
2283static int is_congested(struct dm_dev *dev, int bdi_bits)
2284{
2285 struct request_queue *q = bdev_get_queue(dev->bdev);
2286 return bdi_congested(&q->backing_dev_info, bdi_bits);
2287}
2288
2289static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2290{
2291 struct cache *cache = container_of(cb, struct cache, callbacks);
2292
2293 return is_congested(cache->origin_dev, bdi_bits) ||
2294 is_congested(cache->cache_dev, bdi_bits);
2295}
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305static void destroy(struct cache *cache)
2306{
2307 unsigned i;
2308
2309 mempool_destroy(cache->migration_pool);
2310
2311 if (cache->all_io_ds)
2312 dm_deferred_set_destroy(cache->all_io_ds);
2313
2314 if (cache->prison)
2315 dm_bio_prison_destroy(cache->prison);
2316
2317 if (cache->wq)
2318 destroy_workqueue(cache->wq);
2319
2320 if (cache->dirty_bitset)
2321 free_bitset(cache->dirty_bitset);
2322
2323 if (cache->discard_bitset)
2324 free_bitset(cache->discard_bitset);
2325
2326 if (cache->copier)
2327 dm_kcopyd_client_destroy(cache->copier);
2328
2329 if (cache->cmd)
2330 dm_cache_metadata_close(cache->cmd);
2331
2332 if (cache->metadata_dev)
2333 dm_put_device(cache->ti, cache->metadata_dev);
2334
2335 if (cache->origin_dev)
2336 dm_put_device(cache->ti, cache->origin_dev);
2337
2338 if (cache->cache_dev)
2339 dm_put_device(cache->ti, cache->cache_dev);
2340
2341 if (cache->policy)
2342 dm_cache_policy_destroy(cache->policy);
2343
2344 for (i = 0; i < cache->nr_ctr_args ; i++)
2345 kfree(cache->ctr_args[i]);
2346 kfree(cache->ctr_args);
2347
2348 kfree(cache);
2349}
2350
2351static void cache_dtr(struct dm_target *ti)
2352{
2353 struct cache *cache = ti->private;
2354
2355 destroy(cache);
2356}
2357
2358static sector_t get_dev_size(struct dm_dev *dev)
2359{
2360 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2361}
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394struct cache_args {
2395 struct dm_target *ti;
2396
2397 struct dm_dev *metadata_dev;
2398
2399 struct dm_dev *cache_dev;
2400 sector_t cache_sectors;
2401
2402 struct dm_dev *origin_dev;
2403 sector_t origin_sectors;
2404
2405 uint32_t block_size;
2406
2407 const char *policy_name;
2408 int policy_argc;
2409 const char **policy_argv;
2410
2411 struct cache_features features;
2412};
2413
2414static void destroy_cache_args(struct cache_args *ca)
2415{
2416 if (ca->metadata_dev)
2417 dm_put_device(ca->ti, ca->metadata_dev);
2418
2419 if (ca->cache_dev)
2420 dm_put_device(ca->ti, ca->cache_dev);
2421
2422 if (ca->origin_dev)
2423 dm_put_device(ca->ti, ca->origin_dev);
2424
2425 kfree(ca);
2426}
2427
2428static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2429{
2430 if (!as->argc) {
2431 *error = "Insufficient args";
2432 return false;
2433 }
2434
2435 return true;
2436}
2437
2438static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2439 char **error)
2440{
2441 int r;
2442 sector_t metadata_dev_size;
2443 char b[BDEVNAME_SIZE];
2444
2445 if (!at_least_one_arg(as, error))
2446 return -EINVAL;
2447
2448 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2449 &ca->metadata_dev);
2450 if (r) {
2451 *error = "Error opening metadata device";
2452 return r;
2453 }
2454
2455 metadata_dev_size = get_dev_size(ca->metadata_dev);
2456 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2457 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2458 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2459
2460 return 0;
2461}
2462
2463static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2464 char **error)
2465{
2466 int r;
2467
2468 if (!at_least_one_arg(as, error))
2469 return -EINVAL;
2470
2471 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2472 &ca->cache_dev);
2473 if (r) {
2474 *error = "Error opening cache device";
2475 return r;
2476 }
2477 ca->cache_sectors = get_dev_size(ca->cache_dev);
2478
2479 return 0;
2480}
2481
2482static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2483 char **error)
2484{
2485 int r;
2486
2487 if (!at_least_one_arg(as, error))
2488 return -EINVAL;
2489
2490 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2491 &ca->origin_dev);
2492 if (r) {
2493 *error = "Error opening origin device";
2494 return r;
2495 }
2496
2497 ca->origin_sectors = get_dev_size(ca->origin_dev);
2498 if (ca->ti->len > ca->origin_sectors) {
2499 *error = "Device size larger than cached device";
2500 return -EINVAL;
2501 }
2502
2503 return 0;
2504}
2505
2506static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2507 char **error)
2508{
2509 unsigned long block_size;
2510
2511 if (!at_least_one_arg(as, error))
2512 return -EINVAL;
2513
2514 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2515 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2516 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2517 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2518 *error = "Invalid data block size";
2519 return -EINVAL;
2520 }
2521
2522 if (block_size > ca->cache_sectors) {
2523 *error = "Data block size is larger than the cache device";
2524 return -EINVAL;
2525 }
2526
2527 ca->block_size = block_size;
2528
2529 return 0;
2530}
2531
2532static void init_features(struct cache_features *cf)
2533{
2534 cf->mode = CM_WRITE;
2535 cf->io_mode = CM_IO_WRITEBACK;
2536}
2537
2538static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2539 char **error)
2540{
2541 static struct dm_arg _args[] = {
2542 {0, 1, "Invalid number of cache feature arguments"},
2543 };
2544
2545 int r;
2546 unsigned argc;
2547 const char *arg;
2548 struct cache_features *cf = &ca->features;
2549
2550 init_features(cf);
2551
2552 r = dm_read_arg_group(_args, as, &argc, error);
2553 if (r)
2554 return -EINVAL;
2555
2556 while (argc--) {
2557 arg = dm_shift_arg(as);
2558
2559 if (!strcasecmp(arg, "writeback"))
2560 cf->io_mode = CM_IO_WRITEBACK;
2561
2562 else if (!strcasecmp(arg, "writethrough"))
2563 cf->io_mode = CM_IO_WRITETHROUGH;
2564
2565 else if (!strcasecmp(arg, "passthrough"))
2566 cf->io_mode = CM_IO_PASSTHROUGH;
2567
2568 else {
2569 *error = "Unrecognised cache feature requested";
2570 return -EINVAL;
2571 }
2572 }
2573
2574 return 0;
2575}
2576
2577static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2578 char **error)
2579{
2580 static struct dm_arg _args[] = {
2581 {0, 1024, "Invalid number of policy arguments"},
2582 };
2583
2584 int r;
2585
2586 if (!at_least_one_arg(as, error))
2587 return -EINVAL;
2588
2589 ca->policy_name = dm_shift_arg(as);
2590
2591 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2592 if (r)
2593 return -EINVAL;
2594
2595 ca->policy_argv = (const char **)as->argv;
2596 dm_consume_args(as, ca->policy_argc);
2597
2598 return 0;
2599}
2600
2601static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2602 char **error)
2603{
2604 int r;
2605 struct dm_arg_set as;
2606
2607 as.argc = argc;
2608 as.argv = argv;
2609
2610 r = parse_metadata_dev(ca, &as, error);
2611 if (r)
2612 return r;
2613
2614 r = parse_cache_dev(ca, &as, error);
2615 if (r)
2616 return r;
2617
2618 r = parse_origin_dev(ca, &as, error);
2619 if (r)
2620 return r;
2621
2622 r = parse_block_size(ca, &as, error);
2623 if (r)
2624 return r;
2625
2626 r = parse_features(ca, &as, error);
2627 if (r)
2628 return r;
2629
2630 r = parse_policy(ca, &as, error);
2631 if (r)
2632 return r;
2633
2634 return 0;
2635}
2636
2637
2638
2639static struct kmem_cache *migration_cache;
2640
2641#define NOT_CORE_OPTION 1
2642
2643static int process_config_option(struct cache *cache, const char *key, const char *value)
2644{
2645 unsigned long tmp;
2646
2647 if (!strcasecmp(key, "migration_threshold")) {
2648 if (kstrtoul(value, 10, &tmp))
2649 return -EINVAL;
2650
2651 cache->migration_threshold = tmp;
2652 return 0;
2653 }
2654
2655 return NOT_CORE_OPTION;
2656}
2657
2658static int set_config_value(struct cache *cache, const char *key, const char *value)
2659{
2660 int r = process_config_option(cache, key, value);
2661
2662 if (r == NOT_CORE_OPTION)
2663 r = policy_set_config_value(cache->policy, key, value);
2664
2665 if (r)
2666 DMWARN("bad config value for %s: %s", key, value);
2667
2668 return r;
2669}
2670
2671static int set_config_values(struct cache *cache, int argc, const char **argv)
2672{
2673 int r = 0;
2674
2675 if (argc & 1) {
2676 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2677 return -EINVAL;
2678 }
2679
2680 while (argc) {
2681 r = set_config_value(cache, argv[0], argv[1]);
2682 if (r)
2683 break;
2684
2685 argc -= 2;
2686 argv += 2;
2687 }
2688
2689 return r;
2690}
2691
2692static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2693 char **error)
2694{
2695 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2696 cache->cache_size,
2697 cache->origin_sectors,
2698 cache->sectors_per_block);
2699 if (IS_ERR(p)) {
2700 *error = "Error creating cache's policy";
2701 return PTR_ERR(p);
2702 }
2703 cache->policy = p;
2704
2705 return 0;
2706}
2707
2708
2709
2710
2711
2712#define MAX_DISCARD_BLOCKS (1 << 14)
2713
2714static bool too_many_discard_blocks(sector_t discard_block_size,
2715 sector_t origin_size)
2716{
2717 (void) sector_div(origin_size, discard_block_size);
2718
2719 return origin_size > MAX_DISCARD_BLOCKS;
2720}
2721
2722static sector_t calculate_discard_block_size(sector_t cache_block_size,
2723 sector_t origin_size)
2724{
2725 sector_t discard_block_size = cache_block_size;
2726
2727 if (origin_size)
2728 while (too_many_discard_blocks(discard_block_size, origin_size))
2729 discard_block_size *= 2;
2730
2731 return discard_block_size;
2732}
2733
2734static void set_cache_size(struct cache *cache, dm_cblock_t size)
2735{
2736 dm_block_t nr_blocks = from_cblock(size);
2737
2738 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2739 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2740 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2741 "Please consider increasing the cache block size to reduce the overall cache block count.",
2742 (unsigned long long) nr_blocks);
2743
2744 cache->cache_size = size;
2745}
2746
2747#define DEFAULT_MIGRATION_THRESHOLD 2048
2748
2749static int cache_create(struct cache_args *ca, struct cache **result)
2750{
2751 int r = 0;
2752 char **error = &ca->ti->error;
2753 struct cache *cache;
2754 struct dm_target *ti = ca->ti;
2755 dm_block_t origin_blocks;
2756 struct dm_cache_metadata *cmd;
2757 bool may_format = ca->features.mode == CM_WRITE;
2758
2759 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2760 if (!cache)
2761 return -ENOMEM;
2762
2763 cache->ti = ca->ti;
2764 ti->private = cache;
2765 ti->num_flush_bios = 2;
2766 ti->flush_supported = true;
2767
2768 ti->num_discard_bios = 1;
2769 ti->discards_supported = true;
2770 ti->discard_zeroes_data_unsupported = true;
2771 ti->split_discard_bios = false;
2772
2773 cache->features = ca->features;
2774 ti->per_bio_data_size = get_per_bio_data_size(cache);
2775
2776 cache->callbacks.congested_fn = cache_is_congested;
2777 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2778
2779 cache->metadata_dev = ca->metadata_dev;
2780 cache->origin_dev = ca->origin_dev;
2781 cache->cache_dev = ca->cache_dev;
2782
2783 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2784
2785
2786 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2787 origin_blocks = block_div(origin_blocks, ca->block_size);
2788 cache->origin_blocks = to_oblock(origin_blocks);
2789
2790 cache->sectors_per_block = ca->block_size;
2791 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2792 r = -EINVAL;
2793 goto bad;
2794 }
2795
2796 if (ca->block_size & (ca->block_size - 1)) {
2797 dm_block_t cache_size = ca->cache_sectors;
2798
2799 cache->sectors_per_block_shift = -1;
2800 cache_size = block_div(cache_size, ca->block_size);
2801 set_cache_size(cache, to_cblock(cache_size));
2802 } else {
2803 cache->sectors_per_block_shift = __ffs(ca->block_size);
2804 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2805 }
2806
2807 r = create_cache_policy(cache, ca, error);
2808 if (r)
2809 goto bad;
2810
2811 cache->policy_nr_args = ca->policy_argc;
2812 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2813
2814 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2815 if (r) {
2816 *error = "Error setting cache policy's config values";
2817 goto bad;
2818 }
2819
2820 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2821 ca->block_size, may_format,
2822 dm_cache_policy_get_hint_size(cache->policy));
2823 if (IS_ERR(cmd)) {
2824 *error = "Error creating metadata object";
2825 r = PTR_ERR(cmd);
2826 goto bad;
2827 }
2828 cache->cmd = cmd;
2829 set_cache_mode(cache, CM_WRITE);
2830 if (get_cache_mode(cache) != CM_WRITE) {
2831 *error = "Unable to get write access to metadata, please check/repair metadata.";
2832 r = -EINVAL;
2833 goto bad;
2834 }
2835
2836 if (passthrough_mode(&cache->features)) {
2837 bool all_clean;
2838
2839 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2840 if (r) {
2841 *error = "dm_cache_metadata_all_clean() failed";
2842 goto bad;
2843 }
2844
2845 if (!all_clean) {
2846 *error = "Cannot enter passthrough mode unless all blocks are clean";
2847 r = -EINVAL;
2848 goto bad;
2849 }
2850 }
2851
2852 spin_lock_init(&cache->lock);
2853 INIT_LIST_HEAD(&cache->deferred_cells);
2854 bio_list_init(&cache->deferred_bios);
2855 bio_list_init(&cache->deferred_flush_bios);
2856 bio_list_init(&cache->deferred_writethrough_bios);
2857 INIT_LIST_HEAD(&cache->quiesced_migrations);
2858 INIT_LIST_HEAD(&cache->completed_migrations);
2859 INIT_LIST_HEAD(&cache->need_commit_migrations);
2860 atomic_set(&cache->nr_allocated_migrations, 0);
2861 atomic_set(&cache->nr_io_migrations, 0);
2862 init_waitqueue_head(&cache->migration_wait);
2863
2864 init_waitqueue_head(&cache->quiescing_wait);
2865 atomic_set(&cache->quiescing, 0);
2866 atomic_set(&cache->quiescing_ack, 0);
2867
2868 r = -ENOMEM;
2869 atomic_set(&cache->nr_dirty, 0);
2870 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2871 if (!cache->dirty_bitset) {
2872 *error = "could not allocate dirty bitset";
2873 goto bad;
2874 }
2875 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2876
2877 cache->discard_block_size =
2878 calculate_discard_block_size(cache->sectors_per_block,
2879 cache->origin_sectors);
2880 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2881 cache->discard_block_size));
2882 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2883 if (!cache->discard_bitset) {
2884 *error = "could not allocate discard bitset";
2885 goto bad;
2886 }
2887 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2888
2889 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2890 if (IS_ERR(cache->copier)) {
2891 *error = "could not create kcopyd client";
2892 r = PTR_ERR(cache->copier);
2893 goto bad;
2894 }
2895
2896 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2897 if (!cache->wq) {
2898 *error = "could not create workqueue for metadata object";
2899 goto bad;
2900 }
2901 INIT_WORK(&cache->worker, do_worker);
2902 INIT_DELAYED_WORK(&cache->waker, do_waker);
2903 cache->last_commit_jiffies = jiffies;
2904
2905 cache->prison = dm_bio_prison_create();
2906 if (!cache->prison) {
2907 *error = "could not create bio prison";
2908 goto bad;
2909 }
2910
2911 cache->all_io_ds = dm_deferred_set_create();
2912 if (!cache->all_io_ds) {
2913 *error = "could not create all_io deferred set";
2914 goto bad;
2915 }
2916
2917 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2918 migration_cache);
2919 if (!cache->migration_pool) {
2920 *error = "Error creating cache's migration mempool";
2921 goto bad;
2922 }
2923
2924 cache->need_tick_bio = true;
2925 cache->sized = false;
2926 cache->invalidate = false;
2927 cache->commit_requested = false;
2928 cache->loaded_mappings = false;
2929 cache->loaded_discards = false;
2930
2931 load_stats(cache);
2932
2933 atomic_set(&cache->stats.demotion, 0);
2934 atomic_set(&cache->stats.promotion, 0);
2935 atomic_set(&cache->stats.copies_avoided, 0);
2936 atomic_set(&cache->stats.cache_cell_clash, 0);
2937 atomic_set(&cache->stats.commit_count, 0);
2938 atomic_set(&cache->stats.discard_count, 0);
2939
2940 spin_lock_init(&cache->invalidation_lock);
2941 INIT_LIST_HEAD(&cache->invalidation_requests);
2942
2943 iot_init(&cache->origin_tracker);
2944
2945 *result = cache;
2946 return 0;
2947
2948bad:
2949 destroy(cache);
2950 return r;
2951}
2952
2953static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2954{
2955 unsigned i;
2956 const char **copy;
2957
2958 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2959 if (!copy)
2960 return -ENOMEM;
2961 for (i = 0; i < argc; i++) {
2962 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2963 if (!copy[i]) {
2964 while (i--)
2965 kfree(copy[i]);
2966 kfree(copy);
2967 return -ENOMEM;
2968 }
2969 }
2970
2971 cache->nr_ctr_args = argc;
2972 cache->ctr_args = copy;
2973
2974 return 0;
2975}
2976
2977static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2978{
2979 int r = -EINVAL;
2980 struct cache_args *ca;
2981 struct cache *cache = NULL;
2982
2983 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2984 if (!ca) {
2985 ti->error = "Error allocating memory for cache";
2986 return -ENOMEM;
2987 }
2988 ca->ti = ti;
2989
2990 r = parse_cache_args(ca, argc, argv, &ti->error);
2991 if (r)
2992 goto out;
2993
2994 r = cache_create(ca, &cache);
2995 if (r)
2996 goto out;
2997
2998 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2999 if (r) {
3000 destroy(cache);
3001 goto out;
3002 }
3003
3004 ti->private = cache;
3005
3006out:
3007 destroy_cache_args(ca);
3008 return r;
3009}
3010
3011
3012
3013static int cache_map(struct dm_target *ti, struct bio *bio)
3014{
3015 struct cache *cache = ti->private;
3016
3017 int r;
3018 struct dm_bio_prison_cell *cell = NULL;
3019 dm_oblock_t block = get_bio_block(cache, bio);
3020 size_t pb_data_size = get_per_bio_data_size(cache);
3021 bool can_migrate = false;
3022 bool fast_promotion;
3023 struct policy_result lookup_result;
3024 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
3025 struct old_oblock_lock ool;
3026
3027 ool.locker.fn = null_locker;
3028
3029 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
3030
3031
3032
3033
3034
3035 remap_to_origin(cache, bio);
3036 accounted_begin(cache, bio);
3037 return DM_MAPIO_REMAPPED;
3038 }
3039
3040 if (discard_or_flush(bio)) {
3041 defer_bio(cache, bio);
3042 return DM_MAPIO_SUBMITTED;
3043 }
3044
3045
3046
3047
3048 cell = alloc_prison_cell(cache);
3049 if (!cell) {
3050 defer_bio(cache, bio);
3051 return DM_MAPIO_SUBMITTED;
3052 }
3053
3054 r = bio_detain(cache, block, bio, cell,
3055 (cell_free_fn) free_prison_cell,
3056 cache, &cell);
3057 if (r) {
3058 if (r < 0)
3059 defer_bio(cache, bio);
3060
3061 return DM_MAPIO_SUBMITTED;
3062 }
3063
3064 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio);
3065
3066 r = policy_map(cache->policy, block, false, can_migrate, fast_promotion,
3067 bio, &ool.locker, &lookup_result);
3068 if (r == -EWOULDBLOCK) {
3069 cell_defer(cache, cell, true);
3070 return DM_MAPIO_SUBMITTED;
3071
3072 } else if (r) {
3073 DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d",
3074 cache_device_name(cache), r);
3075 cell_defer(cache, cell, false);
3076 bio_io_error(bio);
3077 return DM_MAPIO_SUBMITTED;
3078 }
3079
3080 r = DM_MAPIO_REMAPPED;
3081 switch (lookup_result.op) {
3082 case POLICY_HIT:
3083 if (passthrough_mode(&cache->features)) {
3084 if (bio_data_dir(bio) == WRITE) {
3085
3086
3087
3088
3089 cell_defer(cache, cell, true);
3090 r = DM_MAPIO_SUBMITTED;
3091
3092 } else {
3093 inc_miss_counter(cache, bio);
3094 remap_to_origin_clear_discard(cache, bio, block);
3095 accounted_begin(cache, bio);
3096 inc_ds(cache, bio, cell);
3097
3098
3099 cell_defer(cache, cell, false);
3100 }
3101
3102 } else {
3103 inc_hit_counter(cache, bio);
3104 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
3105 !is_dirty(cache, lookup_result.cblock)) {
3106 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
3107 accounted_begin(cache, bio);
3108 inc_ds(cache, bio, cell);
3109 cell_defer(cache, cell, false);
3110
3111 } else
3112 remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false);
3113 }
3114 break;
3115
3116 case POLICY_MISS:
3117 inc_miss_counter(cache, bio);
3118 if (pb->req_nr != 0) {
3119
3120
3121
3122
3123 bio_endio(bio);
3124
3125 cell_defer(cache, cell, false);
3126 r = DM_MAPIO_SUBMITTED;
3127
3128 } else
3129 remap_cell_to_origin_clear_discard(cache, cell, block, false);
3130 break;
3131
3132 default:
3133 DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u",
3134 cache_device_name(cache), __func__,
3135 (unsigned) lookup_result.op);
3136 cell_defer(cache, cell, false);
3137 bio_io_error(bio);
3138 r = DM_MAPIO_SUBMITTED;
3139 }
3140
3141 return r;
3142}
3143
3144static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
3145{
3146 struct cache *cache = ti->private;
3147 unsigned long flags;
3148 size_t pb_data_size = get_per_bio_data_size(cache);
3149 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
3150
3151 if (pb->tick) {
3152 policy_tick(cache->policy, false);
3153
3154 spin_lock_irqsave(&cache->lock, flags);
3155 cache->need_tick_bio = true;
3156 spin_unlock_irqrestore(&cache->lock, flags);
3157 }
3158
3159 check_for_quiesced_migrations(cache, pb);
3160 accounted_complete(cache, bio);
3161
3162 return 0;
3163}
3164
3165static int write_dirty_bitset(struct cache *cache)
3166{
3167 unsigned i, r;
3168
3169 if (get_cache_mode(cache) >= CM_READ_ONLY)
3170 return -EINVAL;
3171
3172 for (i = 0; i < from_cblock(cache->cache_size); i++) {
3173 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
3174 is_dirty(cache, to_cblock(i)));
3175 if (r) {
3176 metadata_operation_failed(cache, "dm_cache_set_dirty", r);
3177 return r;
3178 }
3179 }
3180
3181 return 0;
3182}
3183
3184static int write_discard_bitset(struct cache *cache)
3185{
3186 unsigned i, r;
3187
3188 if (get_cache_mode(cache) >= CM_READ_ONLY)
3189 return -EINVAL;
3190
3191 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
3192 cache->discard_nr_blocks);
3193 if (r) {
3194 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
3195 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
3196 return r;
3197 }
3198
3199 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
3200 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
3201 is_discarded(cache, to_dblock(i)));
3202 if (r) {
3203 metadata_operation_failed(cache, "dm_cache_set_discard", r);
3204 return r;
3205 }
3206 }
3207
3208 return 0;
3209}
3210
3211static int write_hints(struct cache *cache)
3212{
3213 int r;
3214
3215 if (get_cache_mode(cache) >= CM_READ_ONLY)
3216 return -EINVAL;
3217
3218 r = dm_cache_write_hints(cache->cmd, cache->policy);
3219 if (r) {
3220 metadata_operation_failed(cache, "dm_cache_write_hints", r);
3221 return r;
3222 }
3223
3224 return 0;
3225}
3226
3227
3228
3229
3230static bool sync_metadata(struct cache *cache)
3231{
3232 int r1, r2, r3, r4;
3233
3234 r1 = write_dirty_bitset(cache);
3235 if (r1)
3236 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
3237
3238 r2 = write_discard_bitset(cache);
3239 if (r2)
3240 DMERR("%s: could not write discard bitset", cache_device_name(cache));
3241
3242 save_stats(cache);
3243
3244 r3 = write_hints(cache);
3245 if (r3)
3246 DMERR("%s: could not write hints", cache_device_name(cache));
3247
3248
3249
3250
3251
3252
3253 r4 = commit(cache, !r1 && !r2 && !r3);
3254 if (r4)
3255 DMERR("%s: could not write cache metadata", cache_device_name(cache));
3256
3257 return !r1 && !r2 && !r3 && !r4;
3258}
3259
3260static void cache_postsuspend(struct dm_target *ti)
3261{
3262 struct cache *cache = ti->private;
3263
3264 start_quiescing(cache);
3265 wait_for_migrations(cache);
3266 stop_worker(cache);
3267 requeue_deferred_bios(cache);
3268 requeue_deferred_cells(cache);
3269 stop_quiescing(cache);
3270
3271 if (get_cache_mode(cache) == CM_WRITE)
3272 (void) sync_metadata(cache);
3273}
3274
3275static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
3276 bool dirty, uint32_t hint, bool hint_valid)
3277{
3278 int r;
3279 struct cache *cache = context;
3280
3281 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
3282 if (r)
3283 return r;
3284
3285 if (dirty)
3286 set_dirty(cache, oblock, cblock);
3287 else
3288 clear_dirty(cache, oblock, cblock);
3289
3290 return 0;
3291}
3292
3293
3294
3295
3296
3297
3298
3299struct discard_load_info {
3300 struct cache *cache;
3301
3302
3303
3304
3305
3306 dm_block_t block_size;
3307 dm_block_t discard_begin, discard_end;
3308};
3309
3310static void discard_load_info_init(struct cache *cache,
3311 struct discard_load_info *li)
3312{
3313 li->cache = cache;
3314 li->discard_begin = li->discard_end = 0;
3315}
3316
3317static void set_discard_range(struct discard_load_info *li)
3318{
3319 sector_t b, e;
3320
3321 if (li->discard_begin == li->discard_end)
3322 return;
3323
3324
3325
3326
3327 b = li->discard_begin * li->block_size;
3328 e = li->discard_end * li->block_size;
3329
3330
3331
3332
3333 b = dm_sector_div_up(b, li->cache->discard_block_size);
3334 sector_div(e, li->cache->discard_block_size);
3335
3336
3337
3338
3339
3340 if (e > from_dblock(li->cache->discard_nr_blocks))
3341 e = from_dblock(li->cache->discard_nr_blocks);
3342
3343 for (; b < e; b++)
3344 set_discard(li->cache, to_dblock(b));
3345}
3346
3347static int load_discard(void *context, sector_t discard_block_size,
3348 dm_dblock_t dblock, bool discard)
3349{
3350 struct discard_load_info *li = context;
3351
3352 li->block_size = discard_block_size;
3353
3354 if (discard) {
3355 if (from_dblock(dblock) == li->discard_end)
3356
3357
3358
3359 li->discard_end = li->discard_end + 1ULL;
3360
3361 else {
3362
3363
3364
3365 set_discard_range(li);
3366 li->discard_begin = from_dblock(dblock);
3367 li->discard_end = li->discard_begin + 1ULL;
3368 }
3369 } else {
3370 set_discard_range(li);
3371 li->discard_begin = li->discard_end = 0;
3372 }
3373
3374 return 0;
3375}
3376
3377static dm_cblock_t get_cache_dev_size(struct cache *cache)
3378{
3379 sector_t size = get_dev_size(cache->cache_dev);
3380 (void) sector_div(size, cache->sectors_per_block);
3381 return to_cblock(size);
3382}
3383
3384static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3385{
3386 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3387 return true;
3388
3389
3390
3391
3392 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3393 new_size = to_cblock(from_cblock(new_size) + 1);
3394 if (is_dirty(cache, new_size)) {
3395 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
3396 cache_device_name(cache),
3397 (unsigned long long) from_cblock(new_size));
3398 return false;
3399 }
3400 }
3401
3402 return true;
3403}
3404
3405static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3406{
3407 int r;
3408
3409 r = dm_cache_resize(cache->cmd, new_size);
3410 if (r) {
3411 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
3412 metadata_operation_failed(cache, "dm_cache_resize", r);
3413 return r;
3414 }
3415
3416 set_cache_size(cache, new_size);
3417
3418 return 0;
3419}
3420
3421static int cache_preresume(struct dm_target *ti)
3422{
3423 int r = 0;
3424 struct cache *cache = ti->private;
3425 dm_cblock_t csize = get_cache_dev_size(cache);
3426
3427
3428
3429
3430 if (!cache->sized) {
3431 r = resize_cache_dev(cache, csize);
3432 if (r)
3433 return r;
3434
3435 cache->sized = true;
3436
3437 } else if (csize != cache->cache_size) {
3438 if (!can_resize(cache, csize))
3439 return -EINVAL;
3440
3441 r = resize_cache_dev(cache, csize);
3442 if (r)
3443 return r;
3444 }
3445
3446 if (!cache->loaded_mappings) {
3447 r = dm_cache_load_mappings(cache->cmd, cache->policy,
3448 load_mapping, cache);
3449 if (r) {
3450 DMERR("%s: could not load cache mappings", cache_device_name(cache));
3451 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
3452 return r;
3453 }
3454
3455 cache->loaded_mappings = true;
3456 }
3457
3458 if (!cache->loaded_discards) {
3459 struct discard_load_info li;
3460
3461
3462
3463
3464
3465
3466 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3467
3468 discard_load_info_init(cache, &li);
3469 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
3470 if (r) {
3471 DMERR("%s: could not load origin discards", cache_device_name(cache));
3472 metadata_operation_failed(cache, "dm_cache_load_discards", r);
3473 return r;
3474 }
3475 set_discard_range(&li);
3476
3477 cache->loaded_discards = true;
3478 }
3479
3480 return r;
3481}
3482
3483static void cache_resume(struct dm_target *ti)
3484{
3485 struct cache *cache = ti->private;
3486
3487 cache->need_tick_bio = true;
3488 do_waker(&cache->waker.work);
3489}
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502static void cache_status(struct dm_target *ti, status_type_t type,
3503 unsigned status_flags, char *result, unsigned maxlen)
3504{
3505 int r = 0;
3506 unsigned i;
3507 ssize_t sz = 0;
3508 dm_block_t nr_free_blocks_metadata = 0;
3509 dm_block_t nr_blocks_metadata = 0;
3510 char buf[BDEVNAME_SIZE];
3511 struct cache *cache = ti->private;
3512 dm_cblock_t residency;
3513
3514 switch (type) {
3515 case STATUSTYPE_INFO:
3516 if (get_cache_mode(cache) == CM_FAIL) {
3517 DMEMIT("Fail");
3518 break;
3519 }
3520
3521
3522 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3523 (void) commit(cache, false);
3524
3525 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3526 if (r) {
3527 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3528 cache_device_name(cache), r);
3529 goto err;
3530 }
3531
3532 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3533 if (r) {
3534 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3535 cache_device_name(cache), r);
3536 goto err;
3537 }
3538
3539 residency = policy_residency(cache->policy);
3540
3541 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
3542 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3543 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3544 (unsigned long long)nr_blocks_metadata,
3545 cache->sectors_per_block,
3546 (unsigned long long) from_cblock(residency),
3547 (unsigned long long) from_cblock(cache->cache_size),
3548 (unsigned) atomic_read(&cache->stats.read_hit),
3549 (unsigned) atomic_read(&cache->stats.read_miss),
3550 (unsigned) atomic_read(&cache->stats.write_hit),
3551 (unsigned) atomic_read(&cache->stats.write_miss),
3552 (unsigned) atomic_read(&cache->stats.demotion),
3553 (unsigned) atomic_read(&cache->stats.promotion),
3554 (unsigned long) atomic_read(&cache->nr_dirty));
3555
3556 if (writethrough_mode(&cache->features))
3557 DMEMIT("1 writethrough ");
3558
3559 else if (passthrough_mode(&cache->features))
3560 DMEMIT("1 passthrough ");
3561
3562 else if (writeback_mode(&cache->features))
3563 DMEMIT("1 writeback ");
3564
3565 else {
3566 DMERR("%s: internal error: unknown io mode: %d",
3567 cache_device_name(cache), (int) cache->features.io_mode);
3568 goto err;
3569 }
3570
3571 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3572
3573 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3574 if (sz < maxlen) {
3575 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3576 if (r)
3577 DMERR("%s: policy_emit_config_values returned %d",
3578 cache_device_name(cache), r);
3579 }
3580
3581 if (get_cache_mode(cache) == CM_READ_ONLY)
3582 DMEMIT("ro ");
3583 else
3584 DMEMIT("rw ");
3585
3586 if (dm_cache_metadata_needs_check(cache->cmd))
3587 DMEMIT("needs_check ");
3588 else
3589 DMEMIT("- ");
3590
3591 break;
3592
3593 case STATUSTYPE_TABLE:
3594 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3595 DMEMIT("%s ", buf);
3596 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3597 DMEMIT("%s ", buf);
3598 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3599 DMEMIT("%s", buf);
3600
3601 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3602 DMEMIT(" %s", cache->ctr_args[i]);
3603 if (cache->nr_ctr_args)
3604 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3605 }
3606
3607 return;
3608
3609err:
3610 DMEMIT("Error");
3611}
3612
3613
3614
3615
3616
3617
3618
3619static int parse_cblock_range(struct cache *cache, const char *str,
3620 struct cblock_range *result)
3621{
3622 char dummy;
3623 uint64_t b, e;
3624 int r;
3625
3626
3627
3628
3629 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3630 if (r < 0)
3631 return r;
3632
3633 if (r == 2) {
3634 result->begin = to_cblock(b);
3635 result->end = to_cblock(e);
3636 return 0;
3637 }
3638
3639
3640
3641
3642 r = sscanf(str, "%llu%c", &b, &dummy);
3643 if (r < 0)
3644 return r;
3645
3646 if (r == 1) {
3647 result->begin = to_cblock(b);
3648 result->end = to_cblock(from_cblock(result->begin) + 1u);
3649 return 0;
3650 }
3651
3652 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3653 return -EINVAL;
3654}
3655
3656static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3657{
3658 uint64_t b = from_cblock(range->begin);
3659 uint64_t e = from_cblock(range->end);
3660 uint64_t n = from_cblock(cache->cache_size);
3661
3662 if (b >= n) {
3663 DMERR("%s: begin cblock out of range: %llu >= %llu",
3664 cache_device_name(cache), b, n);
3665 return -EINVAL;
3666 }
3667
3668 if (e > n) {
3669 DMERR("%s: end cblock out of range: %llu > %llu",
3670 cache_device_name(cache), e, n);
3671 return -EINVAL;
3672 }
3673
3674 if (b >= e) {
3675 DMERR("%s: invalid cblock range: %llu >= %llu",
3676 cache_device_name(cache), b, e);
3677 return -EINVAL;
3678 }
3679
3680 return 0;
3681}
3682
3683static int request_invalidation(struct cache *cache, struct cblock_range *range)
3684{
3685 struct invalidation_request req;
3686
3687 INIT_LIST_HEAD(&req.list);
3688 req.cblocks = range;
3689 atomic_set(&req.complete, 0);
3690 req.err = 0;
3691 init_waitqueue_head(&req.result_wait);
3692
3693 spin_lock(&cache->invalidation_lock);
3694 list_add(&req.list, &cache->invalidation_requests);
3695 spin_unlock(&cache->invalidation_lock);
3696 wake_worker(cache);
3697
3698 wait_event(req.result_wait, atomic_read(&req.complete));
3699 return req.err;
3700}
3701
3702static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3703 const char **cblock_ranges)
3704{
3705 int r = 0;
3706 unsigned i;
3707 struct cblock_range range;
3708
3709 if (!passthrough_mode(&cache->features)) {
3710 DMERR("%s: cache has to be in passthrough mode for invalidation",
3711 cache_device_name(cache));
3712 return -EPERM;
3713 }
3714
3715 for (i = 0; i < count; i++) {
3716 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3717 if (r)
3718 break;
3719
3720 r = validate_cblock_range(cache, &range);
3721 if (r)
3722 break;
3723
3724
3725
3726
3727 r = request_invalidation(cache, &range);
3728 if (r)
3729 break;
3730 }
3731
3732 return r;
3733}
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3744{
3745 struct cache *cache = ti->private;
3746
3747 if (!argc)
3748 return -EINVAL;
3749
3750 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3751 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3752 cache_device_name(cache));
3753 return -EOPNOTSUPP;
3754 }
3755
3756 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3757 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3758
3759 if (argc != 2)
3760 return -EINVAL;
3761
3762 return set_config_value(cache, argv[0], argv[1]);
3763}
3764
3765static int cache_iterate_devices(struct dm_target *ti,
3766 iterate_devices_callout_fn fn, void *data)
3767{
3768 int r = 0;
3769 struct cache *cache = ti->private;
3770
3771 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3772 if (!r)
3773 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3774
3775 return r;
3776}
3777
3778static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3779{
3780
3781
3782
3783 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3784 cache->origin_sectors);
3785 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3786}
3787
3788static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3789{
3790 struct cache *cache = ti->private;
3791 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3792
3793
3794
3795
3796
3797 if (io_opt_sectors < cache->sectors_per_block ||
3798 do_div(io_opt_sectors, cache->sectors_per_block)) {
3799 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3800 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3801 }
3802 set_discard_limits(cache, limits);
3803}
3804
3805
3806
3807static struct target_type cache_target = {
3808 .name = "cache",
3809 .version = {1, 8, 0},
3810 .module = THIS_MODULE,
3811 .ctr = cache_ctr,
3812 .dtr = cache_dtr,
3813 .map = cache_map,
3814 .end_io = cache_end_io,
3815 .postsuspend = cache_postsuspend,
3816 .preresume = cache_preresume,
3817 .resume = cache_resume,
3818 .status = cache_status,
3819 .message = cache_message,
3820 .iterate_devices = cache_iterate_devices,
3821 .io_hints = cache_io_hints,
3822};
3823
3824static int __init dm_cache_init(void)
3825{
3826 int r;
3827
3828 r = dm_register_target(&cache_target);
3829 if (r) {
3830 DMERR("cache target registration failed: %d", r);
3831 return r;
3832 }
3833
3834 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3835 if (!migration_cache) {
3836 dm_unregister_target(&cache_target);
3837 return -ENOMEM;
3838 }
3839
3840 return 0;
3841}
3842
3843static void __exit dm_cache_exit(void)
3844{
3845 dm_unregister_target(&cache_target);
3846 kmem_cache_destroy(migration_cache);
3847}
3848
3849module_init(dm_cache_init);
3850module_exit(dm_cache_exit);
3851
3852MODULE_DESCRIPTION(DM_NAME " cache target");
3853MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3854MODULE_LICENSE("GPL");
3855