1
2
3
4
5
6
7
8#include "dm.h"
9#include "dm-uevent.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/mutex.h>
14#include <linux/moduleparam.h>
15#include <linux/blkpg.h>
16#include <linux/bio.h>
17#include <linux/mempool.h>
18#include <linux/slab.h>
19#include <linux/idr.h>
20#include <linux/hdreg.h>
21#include <linux/delay.h>
22#include <linux/wait.h>
23#include <linux/kthread.h>
24#include <linux/ktime.h>
25#include <linux/elevator.h>
26#include <linux/blk-mq.h>
27
28#include <trace/events/block.h>
29
30#define DM_MSG_PREFIX "core"
31
32#ifdef CONFIG_PRINTK
33
34
35
36DEFINE_RATELIMIT_STATE(dm_ratelimit_state,
37 DEFAULT_RATELIMIT_INTERVAL,
38 DEFAULT_RATELIMIT_BURST);
39EXPORT_SYMBOL(dm_ratelimit_state);
40#endif
41
42
43
44
45
46#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
47#define DM_COOKIE_LENGTH 24
48
49static const char *_name = DM_NAME;
50
51static unsigned int major = 0;
52static unsigned int _major = 0;
53
54static DEFINE_IDR(_minor_idr);
55
56static DEFINE_SPINLOCK(_minor_lock);
57
58static void do_deferred_remove(struct work_struct *w);
59
60static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
61
62static struct workqueue_struct *deferred_remove_workqueue;
63
64
65
66
67
68struct dm_io {
69 struct mapped_device *md;
70 int error;
71 atomic_t io_count;
72 struct bio *bio;
73 unsigned long start_time;
74 spinlock_t endio_lock;
75 struct dm_stats_aux stats_aux;
76};
77
78
79
80
81
82struct dm_rq_target_io {
83 struct mapped_device *md;
84 struct dm_target *ti;
85 struct request *orig, *clone;
86 struct kthread_work work;
87 int error;
88 union map_info info;
89};
90
91
92
93
94
95
96
97
98
99struct dm_rq_clone_bio_info {
100 struct bio *orig;
101 struct dm_rq_target_io *tio;
102 struct bio clone;
103};
104
105union map_info *dm_get_rq_mapinfo(struct request *rq)
106{
107 if (rq && rq->end_io_data)
108 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
109 return NULL;
110}
111EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
112
113#define MINOR_ALLOCED ((void *)-1)
114
115
116
117
118#define DMF_BLOCK_IO_FOR_SUSPEND 0
119#define DMF_SUSPENDED 1
120#define DMF_FROZEN 2
121#define DMF_FREEING 3
122#define DMF_DELETING 4
123#define DMF_NOFLUSH_SUSPENDING 5
124#define DMF_MERGE_IS_OPTIONAL 6
125#define DMF_DEFERRED_REMOVE 7
126#define DMF_SUSPENDED_INTERNALLY 8
127
128
129
130
131
132struct dm_table {
133 int undefined__;
134};
135
136
137
138
139struct mapped_device {
140 struct srcu_struct io_barrier;
141 struct mutex suspend_lock;
142 atomic_t holders;
143 atomic_t open_count;
144
145
146
147
148
149
150 struct dm_table __rcu *map;
151
152 struct list_head table_devices;
153 struct mutex table_devices_lock;
154
155 unsigned long flags;
156
157 struct request_queue *queue;
158 unsigned type;
159
160 struct mutex type_lock;
161
162 struct target_type *immutable_target_type;
163
164 struct gendisk *disk;
165 char name[16];
166
167 void *interface_ptr;
168
169
170
171
172 atomic_t pending[2];
173 wait_queue_head_t wait;
174 struct work_struct work;
175 struct bio_list deferred;
176 spinlock_t deferred_lock;
177
178
179
180
181 struct workqueue_struct *wq;
182
183
184
185
186 mempool_t *io_pool;
187 mempool_t *rq_pool;
188
189 struct bio_set *bs;
190
191
192
193
194 atomic_t event_nr;
195 wait_queue_head_t eventq;
196 atomic_t uevent_seq;
197 struct list_head uevent_list;
198 spinlock_t uevent_lock;
199
200
201
202
203 struct super_block *frozen_sb;
204 struct block_device *bdev;
205
206
207 struct hd_geometry geometry;
208
209
210 struct dm_kobject_holder kobj_holder;
211
212
213 struct bio flush_bio;
214
215
216 unsigned internal_suspend_count;
217
218 struct dm_stats stats;
219
220 struct kthread_worker kworker;
221 struct task_struct *kworker_task;
222
223
224 unsigned seq_rq_merge_deadline_usecs;
225 int last_rq_rw;
226 sector_t last_rq_pos;
227 ktime_t last_rq_start_time;
228
229
230 struct blk_mq_tag_set tag_set;
231 bool use_blk_mq;
232};
233
234#ifdef CONFIG_DM_MQ_DEFAULT
235static bool use_blk_mq = true;
236#else
237static bool use_blk_mq = false;
238#endif
239
240bool dm_use_blk_mq(struct mapped_device *md)
241{
242 return md->use_blk_mq;
243}
244
245
246
247
248struct dm_md_mempools {
249 mempool_t *io_pool;
250 mempool_t *rq_pool;
251 struct bio_set *bs;
252};
253
254struct table_device {
255 struct list_head list;
256 atomic_t count;
257 struct dm_dev dm_dev;
258};
259
260#define RESERVED_BIO_BASED_IOS 16
261#define RESERVED_REQUEST_BASED_IOS 256
262#define RESERVED_MAX_IOS 1024
263static struct kmem_cache *_io_cache;
264static struct kmem_cache *_rq_tio_cache;
265static struct kmem_cache *_rq_cache;
266
267
268
269
270static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
271
272
273
274
275static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
276
277static unsigned __dm_get_module_param(unsigned *module_param,
278 unsigned def, unsigned max)
279{
280 unsigned param = ACCESS_ONCE(*module_param);
281 unsigned modified_param = 0;
282
283 if (!param)
284 modified_param = def;
285 else if (param > max)
286 modified_param = max;
287
288 if (modified_param) {
289 (void)cmpxchg(module_param, param, modified_param);
290 param = modified_param;
291 }
292
293 return param;
294}
295
296unsigned dm_get_reserved_bio_based_ios(void)
297{
298 return __dm_get_module_param(&reserved_bio_based_ios,
299 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
300}
301EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
302
303unsigned dm_get_reserved_rq_based_ios(void)
304{
305 return __dm_get_module_param(&reserved_rq_based_ios,
306 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
307}
308EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
309
310static int __init local_init(void)
311{
312 int r = -ENOMEM;
313
314
315 _io_cache = KMEM_CACHE(dm_io, 0);
316 if (!_io_cache)
317 return r;
318
319 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
320 if (!_rq_tio_cache)
321 goto out_free_io_cache;
322
323 _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
324 __alignof__(struct request), 0, NULL);
325 if (!_rq_cache)
326 goto out_free_rq_tio_cache;
327
328 r = dm_uevent_init();
329 if (r)
330 goto out_free_rq_cache;
331
332 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
333 if (!deferred_remove_workqueue) {
334 r = -ENOMEM;
335 goto out_uevent_exit;
336 }
337
338 _major = major;
339 r = register_blkdev(_major, _name);
340 if (r < 0)
341 goto out_free_workqueue;
342
343 if (!_major)
344 _major = r;
345
346 return 0;
347
348out_free_workqueue:
349 destroy_workqueue(deferred_remove_workqueue);
350out_uevent_exit:
351 dm_uevent_exit();
352out_free_rq_cache:
353 kmem_cache_destroy(_rq_cache);
354out_free_rq_tio_cache:
355 kmem_cache_destroy(_rq_tio_cache);
356out_free_io_cache:
357 kmem_cache_destroy(_io_cache);
358
359 return r;
360}
361
362static void local_exit(void)
363{
364 flush_scheduled_work();
365 destroy_workqueue(deferred_remove_workqueue);
366
367 kmem_cache_destroy(_rq_cache);
368 kmem_cache_destroy(_rq_tio_cache);
369 kmem_cache_destroy(_io_cache);
370 unregister_blkdev(_major, _name);
371 dm_uevent_exit();
372
373 _major = 0;
374
375 DMINFO("cleaned up");
376}
377
378static int (*_inits[])(void) __initdata = {
379 local_init,
380 dm_target_init,
381 dm_linear_init,
382 dm_stripe_init,
383 dm_io_init,
384 dm_kcopyd_init,
385 dm_interface_init,
386 dm_statistics_init,
387};
388
389static void (*_exits[])(void) = {
390 local_exit,
391 dm_target_exit,
392 dm_linear_exit,
393 dm_stripe_exit,
394 dm_io_exit,
395 dm_kcopyd_exit,
396 dm_interface_exit,
397 dm_statistics_exit,
398};
399
400static int __init dm_init(void)
401{
402 const int count = ARRAY_SIZE(_inits);
403
404 int r, i;
405
406 for (i = 0; i < count; i++) {
407 r = _inits[i]();
408 if (r)
409 goto bad;
410 }
411
412 return 0;
413
414 bad:
415 while (i--)
416 _exits[i]();
417
418 return r;
419}
420
421static void __exit dm_exit(void)
422{
423 int i = ARRAY_SIZE(_exits);
424
425 while (i--)
426 _exits[i]();
427
428
429
430
431 idr_destroy(&_minor_idr);
432}
433
434
435
436
437int dm_deleting_md(struct mapped_device *md)
438{
439 return test_bit(DMF_DELETING, &md->flags);
440}
441
442static int dm_blk_open(struct block_device *bdev, fmode_t mode)
443{
444 struct mapped_device *md;
445
446 spin_lock(&_minor_lock);
447
448 md = bdev->bd_disk->private_data;
449 if (!md)
450 goto out;
451
452 if (test_bit(DMF_FREEING, &md->flags) ||
453 dm_deleting_md(md)) {
454 md = NULL;
455 goto out;
456 }
457
458 dm_get(md);
459 atomic_inc(&md->open_count);
460out:
461 spin_unlock(&_minor_lock);
462
463 return md ? 0 : -ENXIO;
464}
465
466static void dm_blk_close(struct gendisk *disk, fmode_t mode)
467{
468 struct mapped_device *md;
469
470 spin_lock(&_minor_lock);
471
472 md = disk->private_data;
473 if (WARN_ON(!md))
474 goto out;
475
476 if (atomic_dec_and_test(&md->open_count) &&
477 (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
478 queue_work(deferred_remove_workqueue, &deferred_remove_work);
479
480 dm_put(md);
481out:
482 spin_unlock(&_minor_lock);
483}
484
485int dm_open_count(struct mapped_device *md)
486{
487 return atomic_read(&md->open_count);
488}
489
490
491
492
493int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
494{
495 int r = 0;
496
497 spin_lock(&_minor_lock);
498
499 if (dm_open_count(md)) {
500 r = -EBUSY;
501 if (mark_deferred)
502 set_bit(DMF_DEFERRED_REMOVE, &md->flags);
503 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
504 r = -EEXIST;
505 else
506 set_bit(DMF_DELETING, &md->flags);
507
508 spin_unlock(&_minor_lock);
509
510 return r;
511}
512
513int dm_cancel_deferred_remove(struct mapped_device *md)
514{
515 int r = 0;
516
517 spin_lock(&_minor_lock);
518
519 if (test_bit(DMF_DELETING, &md->flags))
520 r = -EBUSY;
521 else
522 clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
523
524 spin_unlock(&_minor_lock);
525
526 return r;
527}
528
529static void do_deferred_remove(struct work_struct *w)
530{
531 dm_deferred_remove();
532}
533
534sector_t dm_get_size(struct mapped_device *md)
535{
536 return get_capacity(md->disk);
537}
538
539struct request_queue *dm_get_md_queue(struct mapped_device *md)
540{
541 return md->queue;
542}
543
544struct dm_stats *dm_get_stats(struct mapped_device *md)
545{
546 return &md->stats;
547}
548
549static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
550{
551 struct mapped_device *md = bdev->bd_disk->private_data;
552
553 return dm_get_geometry(md, geo);
554}
555
556static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
557 unsigned int cmd, unsigned long arg)
558{
559 struct mapped_device *md = bdev->bd_disk->private_data;
560 int srcu_idx;
561 struct dm_table *map;
562 struct dm_target *tgt;
563 int r = -ENOTTY;
564
565retry:
566 map = dm_get_live_table(md, &srcu_idx);
567
568 if (!map || !dm_table_get_size(map))
569 goto out;
570
571
572 if (dm_table_get_num_targets(map) != 1)
573 goto out;
574
575 tgt = dm_table_get_target(map, 0);
576 if (!tgt->type->ioctl)
577 goto out;
578
579 if (dm_suspended_md(md)) {
580 r = -EAGAIN;
581 goto out;
582 }
583
584 r = tgt->type->ioctl(tgt, cmd, arg);
585
586out:
587 dm_put_live_table(md, srcu_idx);
588
589 if (r == -ENOTCONN) {
590 msleep(10);
591 goto retry;
592 }
593
594 return r;
595}
596
597static struct dm_io *alloc_io(struct mapped_device *md)
598{
599 return mempool_alloc(md->io_pool, GFP_NOIO);
600}
601
602static void free_io(struct mapped_device *md, struct dm_io *io)
603{
604 mempool_free(io, md->io_pool);
605}
606
607static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
608{
609 bio_put(&tio->clone);
610}
611
612static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
613 gfp_t gfp_mask)
614{
615 return mempool_alloc(md->io_pool, gfp_mask);
616}
617
618static void free_rq_tio(struct dm_rq_target_io *tio)
619{
620 mempool_free(tio, tio->md->io_pool);
621}
622
623static struct request *alloc_clone_request(struct mapped_device *md,
624 gfp_t gfp_mask)
625{
626 return mempool_alloc(md->rq_pool, gfp_mask);
627}
628
629static void free_clone_request(struct mapped_device *md, struct request *rq)
630{
631 mempool_free(rq, md->rq_pool);
632}
633
634static int md_in_flight(struct mapped_device *md)
635{
636 return atomic_read(&md->pending[READ]) +
637 atomic_read(&md->pending[WRITE]);
638}
639
640static void start_io_acct(struct dm_io *io)
641{
642 struct mapped_device *md = io->md;
643 struct bio *bio = io->bio;
644 int cpu;
645 int rw = bio_data_dir(bio);
646
647 io->start_time = jiffies;
648
649 cpu = part_stat_lock();
650 part_round_stats(cpu, &dm_disk(md)->part0);
651 part_stat_unlock();
652 atomic_set(&dm_disk(md)->part0.in_flight[rw],
653 atomic_inc_return(&md->pending[rw]));
654
655 if (unlikely(dm_stats_used(&md->stats)))
656 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
657 bio_sectors(bio), false, 0, &io->stats_aux);
658}
659
660static void end_io_acct(struct dm_io *io)
661{
662 struct mapped_device *md = io->md;
663 struct bio *bio = io->bio;
664 unsigned long duration = jiffies - io->start_time;
665 int pending;
666 int rw = bio_data_dir(bio);
667
668 generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
669
670 if (unlikely(dm_stats_used(&md->stats)))
671 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
672 bio_sectors(bio), true, duration, &io->stats_aux);
673
674
675
676
677
678 pending = atomic_dec_return(&md->pending[rw]);
679 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
680 pending += atomic_read(&md->pending[rw^0x1]);
681
682
683 if (!pending)
684 wake_up(&md->wait);
685}
686
687
688
689
690static void queue_io(struct mapped_device *md, struct bio *bio)
691{
692 unsigned long flags;
693
694 spin_lock_irqsave(&md->deferred_lock, flags);
695 bio_list_add(&md->deferred, bio);
696 spin_unlock_irqrestore(&md->deferred_lock, flags);
697 queue_work(md->wq, &md->work);
698}
699
700
701
702
703
704
705struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
706{
707 *srcu_idx = srcu_read_lock(&md->io_barrier);
708
709 return srcu_dereference(md->map, &md->io_barrier);
710}
711
712void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
713{
714 srcu_read_unlock(&md->io_barrier, srcu_idx);
715}
716
717void dm_sync_table(struct mapped_device *md)
718{
719 synchronize_srcu(&md->io_barrier);
720 synchronize_rcu_expedited();
721}
722
723
724
725
726
727static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
728{
729 rcu_read_lock();
730 return rcu_dereference(md->map);
731}
732
733static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
734{
735 rcu_read_unlock();
736}
737
738
739
740
741static int open_table_device(struct table_device *td, dev_t dev,
742 struct mapped_device *md)
743{
744 static char *_claim_ptr = "I belong to device-mapper";
745 struct block_device *bdev;
746
747 int r;
748
749 BUG_ON(td->dm_dev.bdev);
750
751 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
752 if (IS_ERR(bdev))
753 return PTR_ERR(bdev);
754
755 r = bd_link_disk_holder(bdev, dm_disk(md));
756 if (r) {
757 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
758 return r;
759 }
760
761 td->dm_dev.bdev = bdev;
762 return 0;
763}
764
765
766
767
768static void close_table_device(struct table_device *td, struct mapped_device *md)
769{
770 if (!td->dm_dev.bdev)
771 return;
772
773 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
774 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
775 td->dm_dev.bdev = NULL;
776}
777
778static struct table_device *find_table_device(struct list_head *l, dev_t dev,
779 fmode_t mode) {
780 struct table_device *td;
781
782 list_for_each_entry(td, l, list)
783 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
784 return td;
785
786 return NULL;
787}
788
789int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
790 struct dm_dev **result) {
791 int r;
792 struct table_device *td;
793
794 mutex_lock(&md->table_devices_lock);
795 td = find_table_device(&md->table_devices, dev, mode);
796 if (!td) {
797 td = kmalloc(sizeof(*td), GFP_KERNEL);
798 if (!td) {
799 mutex_unlock(&md->table_devices_lock);
800 return -ENOMEM;
801 }
802
803 td->dm_dev.mode = mode;
804 td->dm_dev.bdev = NULL;
805
806 if ((r = open_table_device(td, dev, md))) {
807 mutex_unlock(&md->table_devices_lock);
808 kfree(td);
809 return r;
810 }
811
812 format_dev_t(td->dm_dev.name, dev);
813
814 atomic_set(&td->count, 0);
815 list_add(&td->list, &md->table_devices);
816 }
817 atomic_inc(&td->count);
818 mutex_unlock(&md->table_devices_lock);
819
820 *result = &td->dm_dev;
821 return 0;
822}
823EXPORT_SYMBOL_GPL(dm_get_table_device);
824
825void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
826{
827 struct table_device *td = container_of(d, struct table_device, dm_dev);
828
829 mutex_lock(&md->table_devices_lock);
830 if (atomic_dec_and_test(&td->count)) {
831 close_table_device(td, md);
832 list_del(&td->list);
833 kfree(td);
834 }
835 mutex_unlock(&md->table_devices_lock);
836}
837EXPORT_SYMBOL(dm_put_table_device);
838
839static void free_table_devices(struct list_head *devices)
840{
841 struct list_head *tmp, *next;
842
843 list_for_each_safe(tmp, next, devices) {
844 struct table_device *td = list_entry(tmp, struct table_device, list);
845
846 DMWARN("dm_destroy: %s still exists with %d references",
847 td->dm_dev.name, atomic_read(&td->count));
848 kfree(td);
849 }
850}
851
852
853
854
855int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
856{
857 *geo = md->geometry;
858
859 return 0;
860}
861
862
863
864
865int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
866{
867 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
868
869 if (geo->start > sz) {
870 DMWARN("Start sector is beyond the geometry limits.");
871 return -EINVAL;
872 }
873
874 md->geometry = *geo;
875
876 return 0;
877}
878
879
880
881
882
883
884
885
886
887
888static int __noflush_suspending(struct mapped_device *md)
889{
890 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
891}
892
893
894
895
896
897static void dec_pending(struct dm_io *io, int error)
898{
899 unsigned long flags;
900 int io_error;
901 struct bio *bio;
902 struct mapped_device *md = io->md;
903
904
905 if (unlikely(error)) {
906 spin_lock_irqsave(&io->endio_lock, flags);
907 if (!(io->error > 0 && __noflush_suspending(md)))
908 io->error = error;
909 spin_unlock_irqrestore(&io->endio_lock, flags);
910 }
911
912 if (atomic_dec_and_test(&io->io_count)) {
913 if (io->error == DM_ENDIO_REQUEUE) {
914
915
916
917 spin_lock_irqsave(&md->deferred_lock, flags);
918 if (__noflush_suspending(md))
919 bio_list_add_head(&md->deferred, io->bio);
920 else
921
922 io->error = -EIO;
923 spin_unlock_irqrestore(&md->deferred_lock, flags);
924 }
925
926 io_error = io->error;
927 bio = io->bio;
928 end_io_acct(io);
929 free_io(md, io);
930
931 if (io_error == DM_ENDIO_REQUEUE)
932 return;
933
934 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
935
936
937
938
939 bio->bi_rw &= ~REQ_FLUSH;
940 queue_io(md, bio);
941 } else {
942
943 trace_block_bio_complete(md->queue, bio, io_error);
944 bio_endio(bio, io_error);
945 }
946 }
947}
948
949static void disable_write_same(struct mapped_device *md)
950{
951 struct queue_limits *limits = dm_get_queue_limits(md);
952
953
954 limits->max_write_same_sectors = 0;
955}
956
957static void clone_endio(struct bio *bio, int error)
958{
959 int r = error;
960 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
961 struct dm_io *io = tio->io;
962 struct mapped_device *md = tio->io->md;
963 dm_endio_fn endio = tio->ti->type->end_io;
964
965 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
966 error = -EIO;
967
968 if (endio) {
969 r = endio(tio->ti, bio, error);
970 if (r < 0 || r == DM_ENDIO_REQUEUE)
971
972
973
974
975 error = r;
976 else if (r == DM_ENDIO_INCOMPLETE)
977
978 return;
979 else if (r) {
980 DMWARN("unimplemented target endio return value: %d", r);
981 BUG();
982 }
983 }
984
985 if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
986 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
987 disable_write_same(md);
988
989 free_tio(md, tio);
990 dec_pending(io, error);
991}
992
993
994
995
996static void end_clone_bio(struct bio *clone, int error)
997{
998 struct dm_rq_clone_bio_info *info =
999 container_of(clone, struct dm_rq_clone_bio_info, clone);
1000 struct dm_rq_target_io *tio = info->tio;
1001 struct bio *bio = info->orig;
1002 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
1003
1004 bio_put(clone);
1005
1006 if (tio->error)
1007
1008
1009
1010
1011
1012 return;
1013 else if (error) {
1014
1015
1016
1017
1018
1019 tio->error = error;
1020 return;
1021 }
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033 if (tio->orig->bio != bio)
1034 DMERR("bio completion is going in the middle of the request");
1035
1036
1037
1038
1039
1040
1041 blk_update_request(tio->orig, 0, nr_bytes);
1042}
1043
1044static struct dm_rq_target_io *tio_from_request(struct request *rq)
1045{
1046 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
1047}
1048
1049
1050
1051
1052
1053
1054static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
1055{
1056 int nr_requests_pending;
1057
1058 atomic_dec(&md->pending[rw]);
1059
1060
1061 nr_requests_pending = md_in_flight(md);
1062 if (!nr_requests_pending)
1063 wake_up(&md->wait);
1064
1065
1066
1067
1068
1069
1070
1071 if (run_queue) {
1072 if (md->queue->mq_ops)
1073 blk_mq_run_hw_queues(md->queue, true);
1074 else if (!nr_requests_pending ||
1075 (nr_requests_pending >= md->queue->nr_congestion_on))
1076 blk_run_queue_async(md->queue);
1077 }
1078
1079
1080
1081
1082 dm_put(md);
1083}
1084
1085static void free_rq_clone(struct request *clone)
1086{
1087 struct dm_rq_target_io *tio = clone->end_io_data;
1088 struct mapped_device *md = tio->md;
1089
1090 blk_rq_unprep_clone(clone);
1091
1092 if (md->type == DM_TYPE_MQ_REQUEST_BASED)
1093
1094 tio->ti->type->release_clone_rq(clone);
1095 else if (!md->queue->mq_ops)
1096
1097 free_clone_request(md, clone);
1098
1099
1100
1101
1102
1103
1104 if (!md->queue->mq_ops)
1105 free_rq_tio(tio);
1106}
1107
1108
1109
1110
1111
1112
1113static void dm_end_request(struct request *clone, int error)
1114{
1115 int rw = rq_data_dir(clone);
1116 struct dm_rq_target_io *tio = clone->end_io_data;
1117 struct mapped_device *md = tio->md;
1118 struct request *rq = tio->orig;
1119
1120 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
1121 rq->errors = clone->errors;
1122 rq->resid_len = clone->resid_len;
1123
1124 if (rq->sense)
1125
1126
1127
1128
1129
1130 rq->sense_len = clone->sense_len;
1131 }
1132
1133 free_rq_clone(clone);
1134 if (!rq->q->mq_ops)
1135 blk_end_request_all(rq, error);
1136 else
1137 blk_mq_end_request(rq, error);
1138 rq_completed(md, rw, true);
1139}
1140
1141static void dm_unprep_request(struct request *rq)
1142{
1143 struct dm_rq_target_io *tio = tio_from_request(rq);
1144 struct request *clone = tio->clone;
1145
1146 if (!rq->q->mq_ops) {
1147 rq->special = NULL;
1148 rq->cmd_flags &= ~REQ_DONTPREP;
1149 }
1150
1151 if (clone)
1152 free_rq_clone(clone);
1153}
1154
1155
1156
1157
1158static void old_requeue_request(struct request *rq)
1159{
1160 struct request_queue *q = rq->q;
1161 unsigned long flags;
1162
1163 spin_lock_irqsave(q->queue_lock, flags);
1164 blk_requeue_request(q, rq);
1165 blk_run_queue_async(q);
1166 spin_unlock_irqrestore(q->queue_lock, flags);
1167}
1168
1169static void dm_requeue_unmapped_original_request(struct mapped_device *md,
1170 struct request *rq)
1171{
1172 int rw = rq_data_dir(rq);
1173
1174 dm_unprep_request(rq);
1175
1176 if (!rq->q->mq_ops)
1177 old_requeue_request(rq);
1178 else {
1179 blk_mq_requeue_request(rq);
1180 blk_mq_kick_requeue_list(rq->q);
1181 }
1182
1183 rq_completed(md, rw, false);
1184}
1185
1186static void dm_requeue_unmapped_request(struct request *clone)
1187{
1188 struct dm_rq_target_io *tio = clone->end_io_data;
1189
1190 dm_requeue_unmapped_original_request(tio->md, tio->orig);
1191}
1192
1193static void old_stop_queue(struct request_queue *q)
1194{
1195 unsigned long flags;
1196
1197 if (blk_queue_stopped(q))
1198 return;
1199
1200 spin_lock_irqsave(q->queue_lock, flags);
1201 blk_stop_queue(q);
1202 spin_unlock_irqrestore(q->queue_lock, flags);
1203}
1204
1205static void stop_queue(struct request_queue *q)
1206{
1207 if (!q->mq_ops)
1208 old_stop_queue(q);
1209 else
1210 blk_mq_stop_hw_queues(q);
1211}
1212
1213static void old_start_queue(struct request_queue *q)
1214{
1215 unsigned long flags;
1216
1217 spin_lock_irqsave(q->queue_lock, flags);
1218 if (blk_queue_stopped(q))
1219 blk_start_queue(q);
1220 spin_unlock_irqrestore(q->queue_lock, flags);
1221}
1222
1223static void start_queue(struct request_queue *q)
1224{
1225 if (!q->mq_ops)
1226 old_start_queue(q);
1227 else
1228 blk_mq_start_stopped_hw_queues(q, true);
1229}
1230
1231static void dm_done(struct request *clone, int error, bool mapped)
1232{
1233 int r = error;
1234 struct dm_rq_target_io *tio = clone->end_io_data;
1235 dm_request_endio_fn rq_end_io = NULL;
1236
1237 if (tio->ti) {
1238 rq_end_io = tio->ti->type->rq_end_io;
1239
1240 if (mapped && rq_end_io)
1241 r = rq_end_io(tio->ti, clone, error, &tio->info);
1242 }
1243
1244 if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
1245 !clone->q->limits.max_write_same_sectors))
1246 disable_write_same(tio->md);
1247
1248 if (r <= 0)
1249
1250 dm_end_request(clone, r);
1251 else if (r == DM_ENDIO_INCOMPLETE)
1252
1253 return;
1254 else if (r == DM_ENDIO_REQUEUE)
1255
1256 dm_requeue_unmapped_request(clone);
1257 else {
1258 DMWARN("unimplemented target endio return value: %d", r);
1259 BUG();
1260 }
1261}
1262
1263
1264
1265
1266static void dm_softirq_done(struct request *rq)
1267{
1268 bool mapped = true;
1269 struct dm_rq_target_io *tio = tio_from_request(rq);
1270 struct request *clone = tio->clone;
1271 int rw;
1272
1273 if (!clone) {
1274 rw = rq_data_dir(rq);
1275 if (!rq->q->mq_ops) {
1276 blk_end_request_all(rq, tio->error);
1277 rq_completed(tio->md, rw, false);
1278 free_rq_tio(tio);
1279 } else {
1280 blk_mq_end_request(rq, tio->error);
1281 rq_completed(tio->md, rw, false);
1282 }
1283 return;
1284 }
1285
1286 if (rq->cmd_flags & REQ_FAILED)
1287 mapped = false;
1288
1289 dm_done(clone, tio->error, mapped);
1290}
1291
1292
1293
1294
1295
1296static void dm_complete_request(struct request *rq, int error)
1297{
1298 struct dm_rq_target_io *tio = tio_from_request(rq);
1299
1300 tio->error = error;
1301 blk_complete_request(rq);
1302}
1303
1304
1305
1306
1307
1308
1309
1310static void dm_kill_unmapped_request(struct request *rq, int error)
1311{
1312 rq->cmd_flags |= REQ_FAILED;
1313 dm_complete_request(rq, error);
1314}
1315
1316
1317
1318
1319static void end_clone_request(struct request *clone, int error)
1320{
1321 struct dm_rq_target_io *tio = clone->end_io_data;
1322
1323 if (!clone->q->mq_ops) {
1324
1325
1326
1327
1328
1329
1330 __blk_put_request(clone->q, clone);
1331 }
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 dm_complete_request(tio->orig, error);
1342}
1343
1344
1345
1346
1347
1348static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
1349{
1350 sector_t target_offset = dm_target_offset(ti, sector);
1351
1352 return ti->len - target_offset;
1353}
1354
1355static sector_t max_io_len(sector_t sector, struct dm_target *ti)
1356{
1357 sector_t len = max_io_len_target_boundary(sector, ti);
1358 sector_t offset, max_len;
1359
1360
1361
1362
1363 if (ti->max_io_len) {
1364 offset = dm_target_offset(ti, sector);
1365 if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
1366 max_len = sector_div(offset, ti->max_io_len);
1367 else
1368 max_len = offset & (ti->max_io_len - 1);
1369 max_len = ti->max_io_len - max_len;
1370
1371 if (len > max_len)
1372 len = max_len;
1373 }
1374
1375 return len;
1376}
1377
1378int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
1379{
1380 if (len > UINT_MAX) {
1381 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
1382 (unsigned long long)len, UINT_MAX);
1383 ti->error = "Maximum size of target IO is too large";
1384 return -EINVAL;
1385 }
1386
1387 ti->max_io_len = (uint32_t) len;
1388
1389 return 0;
1390}
1391EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1422{
1423 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1424 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
1425 BUG_ON(bio->bi_rw & REQ_FLUSH);
1426 BUG_ON(bi_size > *tio->len_ptr);
1427 BUG_ON(n_sectors > bi_size);
1428 *tio->len_ptr -= bi_size - n_sectors;
1429 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1430}
1431EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1432
1433static void __map_bio(struct dm_target_io *tio)
1434{
1435 int r;
1436 sector_t sector;
1437 struct mapped_device *md;
1438 struct bio *clone = &tio->clone;
1439 struct dm_target *ti = tio->ti;
1440
1441 clone->bi_end_io = clone_endio;
1442
1443
1444
1445
1446
1447
1448 atomic_inc(&tio->io->io_count);
1449 sector = clone->bi_iter.bi_sector;
1450 r = ti->type->map(ti, clone);
1451 if (r == DM_MAPIO_REMAPPED) {
1452
1453
1454 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
1455 tio->io->bio->bi_bdev->bd_dev, sector);
1456
1457 generic_make_request(clone);
1458 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
1459
1460 md = tio->io->md;
1461 dec_pending(tio->io, r);
1462 free_tio(md, tio);
1463 } else if (r) {
1464 DMWARN("unimplemented target map return value: %d", r);
1465 BUG();
1466 }
1467}
1468
1469struct clone_info {
1470 struct mapped_device *md;
1471 struct dm_table *map;
1472 struct bio *bio;
1473 struct dm_io *io;
1474 sector_t sector;
1475 unsigned sector_count;
1476};
1477
1478static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
1479{
1480 bio->bi_iter.bi_sector = sector;
1481 bio->bi_iter.bi_size = to_bytes(len);
1482}
1483
1484
1485
1486
1487static void clone_bio(struct dm_target_io *tio, struct bio *bio,
1488 sector_t sector, unsigned len)
1489{
1490 struct bio *clone = &tio->clone;
1491
1492 __bio_clone_fast(clone, bio);
1493
1494 if (bio_integrity(bio))
1495 bio_integrity_clone(clone, bio, GFP_NOIO);
1496
1497 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
1498 clone->bi_iter.bi_size = to_bytes(len);
1499
1500 if (bio_integrity(bio))
1501 bio_integrity_trim(clone, 0, len);
1502}
1503
1504static struct dm_target_io *alloc_tio(struct clone_info *ci,
1505 struct dm_target *ti,
1506 unsigned target_bio_nr)
1507{
1508 struct dm_target_io *tio;
1509 struct bio *clone;
1510
1511 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
1512 tio = container_of(clone, struct dm_target_io, clone);
1513
1514 tio->io = ci->io;
1515 tio->ti = ti;
1516 tio->target_bio_nr = target_bio_nr;
1517
1518 return tio;
1519}
1520
1521static void __clone_and_map_simple_bio(struct clone_info *ci,
1522 struct dm_target *ti,
1523 unsigned target_bio_nr, unsigned *len)
1524{
1525 struct dm_target_io *tio = alloc_tio(ci, ti, target_bio_nr);
1526 struct bio *clone = &tio->clone;
1527
1528 tio->len_ptr = len;
1529
1530 __bio_clone_fast(clone, ci->bio);
1531 if (len)
1532 bio_setup_sector(clone, ci->sector, *len);
1533
1534 __map_bio(tio);
1535}
1536
1537static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
1538 unsigned num_bios, unsigned *len)
1539{
1540 unsigned target_bio_nr;
1541
1542 for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++)
1543 __clone_and_map_simple_bio(ci, ti, target_bio_nr, len);
1544}
1545
1546static int __send_empty_flush(struct clone_info *ci)
1547{
1548 unsigned target_nr = 0;
1549 struct dm_target *ti;
1550
1551 BUG_ON(bio_has_data(ci->bio));
1552 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1553 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
1554
1555 return 0;
1556}
1557
1558static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
1559 sector_t sector, unsigned *len)
1560{
1561 struct bio *bio = ci->bio;
1562 struct dm_target_io *tio;
1563 unsigned target_bio_nr;
1564 unsigned num_target_bios = 1;
1565
1566
1567
1568
1569 if (bio_data_dir(bio) == WRITE && ti->num_write_bios)
1570 num_target_bios = ti->num_write_bios(ti, bio);
1571
1572 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
1573 tio = alloc_tio(ci, ti, target_bio_nr);
1574 tio->len_ptr = len;
1575 clone_bio(tio, bio, sector, *len);
1576 __map_bio(tio);
1577 }
1578}
1579
1580typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
1581
1582static unsigned get_num_discard_bios(struct dm_target *ti)
1583{
1584 return ti->num_discard_bios;
1585}
1586
1587static unsigned get_num_write_same_bios(struct dm_target *ti)
1588{
1589 return ti->num_write_same_bios;
1590}
1591
1592typedef bool (*is_split_required_fn)(struct dm_target *ti);
1593
1594static bool is_split_required_for_discard(struct dm_target *ti)
1595{
1596 return ti->split_discard_bios;
1597}
1598
1599static int __send_changing_extent_only(struct clone_info *ci,
1600 get_num_bios_fn get_num_bios,
1601 is_split_required_fn is_split_required)
1602{
1603 struct dm_target *ti;
1604 unsigned len;
1605 unsigned num_bios;
1606
1607 do {
1608 ti = dm_table_find_target(ci->map, ci->sector);
1609 if (!dm_target_is_valid(ti))
1610 return -EIO;
1611
1612
1613
1614
1615
1616
1617
1618 num_bios = get_num_bios ? get_num_bios(ti) : 0;
1619 if (!num_bios)
1620 return -EOPNOTSUPP;
1621
1622 if (is_split_required && !is_split_required(ti))
1623 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1624 else
1625 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
1626
1627 __send_duplicate_bios(ci, ti, num_bios, &len);
1628
1629 ci->sector += len;
1630 } while (ci->sector_count -= len);
1631
1632 return 0;
1633}
1634
1635static int __send_discard(struct clone_info *ci)
1636{
1637 return __send_changing_extent_only(ci, get_num_discard_bios,
1638 is_split_required_for_discard);
1639}
1640
1641static int __send_write_same(struct clone_info *ci)
1642{
1643 return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
1644}
1645
1646
1647
1648
1649static int __split_and_process_non_flush(struct clone_info *ci)
1650{
1651 struct bio *bio = ci->bio;
1652 struct dm_target *ti;
1653 unsigned len;
1654
1655 if (unlikely(bio->bi_rw & REQ_DISCARD))
1656 return __send_discard(ci);
1657 else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
1658 return __send_write_same(ci);
1659
1660 ti = dm_table_find_target(ci->map, ci->sector);
1661 if (!dm_target_is_valid(ti))
1662 return -EIO;
1663
1664 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
1665
1666 __clone_and_map_data_bio(ci, ti, ci->sector, &len);
1667
1668 ci->sector += len;
1669 ci->sector_count -= len;
1670
1671 return 0;
1672}
1673
1674
1675
1676
1677static void __split_and_process_bio(struct mapped_device *md,
1678 struct dm_table *map, struct bio *bio)
1679{
1680 struct clone_info ci;
1681 int error = 0;
1682
1683 if (unlikely(!map)) {
1684 bio_io_error(bio);
1685 return;
1686 }
1687
1688 ci.map = map;
1689 ci.md = md;
1690 ci.io = alloc_io(md);
1691 ci.io->error = 0;
1692 atomic_set(&ci.io->io_count, 1);
1693 ci.io->bio = bio;
1694 ci.io->md = md;
1695 spin_lock_init(&ci.io->endio_lock);
1696 ci.sector = bio->bi_iter.bi_sector;
1697
1698 start_io_acct(ci.io);
1699
1700 if (bio->bi_rw & REQ_FLUSH) {
1701 ci.bio = &ci.md->flush_bio;
1702 ci.sector_count = 0;
1703 error = __send_empty_flush(&ci);
1704
1705 } else {
1706 ci.bio = bio;
1707 ci.sector_count = bio_sectors(bio);
1708 while (ci.sector_count && !error)
1709 error = __split_and_process_non_flush(&ci);
1710 }
1711
1712
1713 dec_pending(ci.io, error);
1714}
1715
1716
1717
1718
1719static int dm_merge_bvec(struct request_queue *q,
1720 struct bvec_merge_data *bvm,
1721 struct bio_vec *biovec)
1722{
1723 struct mapped_device *md = q->queuedata;
1724 struct dm_table *map = dm_get_live_table_fast(md);
1725 struct dm_target *ti;
1726 sector_t max_sectors, max_size = 0;
1727
1728 if (unlikely(!map))
1729 goto out;
1730
1731 ti = dm_table_find_target(map, bvm->bi_sector);
1732 if (!dm_target_is_valid(ti))
1733 goto out;
1734
1735
1736
1737
1738 max_sectors = min(max_io_len(bvm->bi_sector, ti),
1739 (sector_t) queue_max_sectors(q));
1740 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
1741
1742
1743
1744
1745
1746
1747
1748
1749 if (max_size > INT_MAX)
1750 max_size = INT_MAX;
1751
1752
1753
1754
1755
1756
1757 if (max_size && ti->type->merge)
1758 max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
1759
1760
1761
1762
1763
1764
1765
1766 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1767 max_size = 0;
1768
1769out:
1770 dm_put_live_table_fast(md);
1771
1772
1773
1774 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
1775 max_size = biovec->bv_len;
1776
1777 return max_size;
1778}
1779
1780
1781
1782
1783
1784static void dm_make_request(struct request_queue *q, struct bio *bio)
1785{
1786 int rw = bio_data_dir(bio);
1787 struct mapped_device *md = q->queuedata;
1788 int srcu_idx;
1789 struct dm_table *map;
1790
1791 map = dm_get_live_table(md, &srcu_idx);
1792
1793 generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
1794
1795
1796 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
1797 dm_put_live_table(md, srcu_idx);
1798
1799 if (bio_rw(bio) != READA)
1800 queue_io(md, bio);
1801 else
1802 bio_io_error(bio);
1803 return;
1804 }
1805
1806 __split_and_process_bio(md, map, bio);
1807 dm_put_live_table(md, srcu_idx);
1808 return;
1809}
1810
1811int dm_request_based(struct mapped_device *md)
1812{
1813 return blk_queue_stackable(md->queue);
1814}
1815
1816static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
1817{
1818 int r;
1819
1820 if (blk_queue_io_stat(clone->q))
1821 clone->cmd_flags |= REQ_IO_STAT;
1822
1823 clone->start_time = jiffies;
1824 r = blk_insert_cloned_request(clone->q, clone);
1825 if (r)
1826
1827 dm_complete_request(rq, r);
1828}
1829
1830static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1831 void *data)
1832{
1833 struct dm_rq_target_io *tio = data;
1834 struct dm_rq_clone_bio_info *info =
1835 container_of(bio, struct dm_rq_clone_bio_info, clone);
1836
1837 info->orig = bio_orig;
1838 info->tio = tio;
1839 bio->bi_end_io = end_clone_bio;
1840
1841 return 0;
1842}
1843
1844static int setup_clone(struct request *clone, struct request *rq,
1845 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1846{
1847 int r;
1848
1849 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
1850 dm_rq_bio_constructor, tio);
1851 if (r)
1852 return r;
1853
1854 clone->cmd = rq->cmd;
1855 clone->cmd_len = rq->cmd_len;
1856 clone->sense = rq->sense;
1857 clone->end_io = end_clone_request;
1858 clone->end_io_data = tio;
1859
1860 tio->clone = clone;
1861
1862 return 0;
1863}
1864
1865static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1866 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1867{
1868
1869
1870
1871
1872 bool alloc_clone = !tio->clone;
1873 struct request *clone;
1874
1875 if (alloc_clone) {
1876 clone = alloc_clone_request(md, gfp_mask);
1877 if (!clone)
1878 return NULL;
1879 } else
1880 clone = tio->clone;
1881
1882 blk_rq_init(NULL, clone);
1883 if (setup_clone(clone, rq, tio, gfp_mask)) {
1884
1885 if (alloc_clone)
1886 free_clone_request(md, clone);
1887 return NULL;
1888 }
1889
1890 return clone;
1891}
1892
1893static void map_tio_request(struct kthread_work *work);
1894
1895static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
1896 struct mapped_device *md)
1897{
1898 tio->md = md;
1899 tio->ti = NULL;
1900 tio->clone = NULL;
1901 tio->orig = rq;
1902 tio->error = 0;
1903 memset(&tio->info, 0, sizeof(tio->info));
1904 if (md->kworker_task)
1905 init_kthread_work(&tio->work, map_tio_request);
1906}
1907
1908static struct dm_rq_target_io *prep_tio(struct request *rq,
1909 struct mapped_device *md, gfp_t gfp_mask)
1910{
1911 struct dm_rq_target_io *tio;
1912 int srcu_idx;
1913 struct dm_table *table;
1914
1915 tio = alloc_rq_tio(md, gfp_mask);
1916 if (!tio)
1917 return NULL;
1918
1919 init_tio(tio, rq, md);
1920
1921 table = dm_get_live_table(md, &srcu_idx);
1922 if (!dm_table_mq_request_based(table)) {
1923 if (!clone_rq(rq, md, tio, gfp_mask)) {
1924 dm_put_live_table(md, srcu_idx);
1925 free_rq_tio(tio);
1926 return NULL;
1927 }
1928 }
1929 dm_put_live_table(md, srcu_idx);
1930
1931 return tio;
1932}
1933
1934
1935
1936
1937static int dm_prep_fn(struct request_queue *q, struct request *rq)
1938{
1939 struct mapped_device *md = q->queuedata;
1940 struct dm_rq_target_io *tio;
1941
1942 if (unlikely(rq->special)) {
1943 DMWARN("Already has something in rq->special.");
1944 return BLKPREP_KILL;
1945 }
1946
1947 tio = prep_tio(rq, md, GFP_ATOMIC);
1948 if (!tio)
1949 return BLKPREP_DEFER;
1950
1951 rq->special = tio;
1952 rq->cmd_flags |= REQ_DONTPREP;
1953
1954 return BLKPREP_OK;
1955}
1956
1957
1958
1959
1960
1961
1962
1963static int map_request(struct dm_rq_target_io *tio, struct request *rq,
1964 struct mapped_device *md)
1965{
1966 int r;
1967 struct dm_target *ti = tio->ti;
1968 struct request *clone = NULL;
1969
1970 if (tio->clone) {
1971 clone = tio->clone;
1972 r = ti->type->map_rq(ti, clone, &tio->info);
1973 } else {
1974 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
1975 if (r < 0) {
1976
1977 dm_kill_unmapped_request(rq, r);
1978 return r;
1979 }
1980 if (r != DM_MAPIO_REMAPPED)
1981 return r;
1982 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
1983
1984 ti->type->release_clone_rq(clone);
1985 return DM_MAPIO_REQUEUE;
1986 }
1987 }
1988
1989 switch (r) {
1990 case DM_MAPIO_SUBMITTED:
1991
1992 break;
1993 case DM_MAPIO_REMAPPED:
1994
1995 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1996 blk_rq_pos(rq));
1997 dm_dispatch_clone_request(clone, rq);
1998 break;
1999 case DM_MAPIO_REQUEUE:
2000
2001 dm_requeue_unmapped_request(clone);
2002 break;
2003 default:
2004 if (r > 0) {
2005 DMWARN("unimplemented target map return value: %d", r);
2006 BUG();
2007 }
2008
2009
2010 dm_kill_unmapped_request(rq, r);
2011 return r;
2012 }
2013
2014 return 0;
2015}
2016
2017static void map_tio_request(struct kthread_work *work)
2018{
2019 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
2020 struct request *rq = tio->orig;
2021 struct mapped_device *md = tio->md;
2022
2023 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
2024 dm_requeue_unmapped_original_request(md, rq);
2025}
2026
2027static void dm_start_request(struct mapped_device *md, struct request *orig)
2028{
2029 if (!orig->q->mq_ops)
2030 blk_start_request(orig);
2031 else
2032 blk_mq_start_request(orig);
2033 atomic_inc(&md->pending[rq_data_dir(orig)]);
2034
2035 if (md->seq_rq_merge_deadline_usecs) {
2036 md->last_rq_pos = rq_end_sector(orig);
2037 md->last_rq_rw = rq_data_dir(orig);
2038 md->last_rq_start_time = ktime_get();
2039 }
2040
2041
2042
2043
2044
2045
2046
2047
2048 dm_get(md);
2049}
2050
2051#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
2052
2053ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
2054{
2055 return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
2056}
2057
2058ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
2059 const char *buf, size_t count)
2060{
2061 unsigned deadline;
2062
2063 if (!dm_request_based(md) || md->use_blk_mq)
2064 return count;
2065
2066 if (kstrtouint(buf, 10, &deadline))
2067 return -EINVAL;
2068
2069 if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
2070 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
2071
2072 md->seq_rq_merge_deadline_usecs = deadline;
2073
2074 return count;
2075}
2076
2077static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
2078{
2079 ktime_t kt_deadline;
2080
2081 if (!md->seq_rq_merge_deadline_usecs)
2082 return false;
2083
2084 kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
2085 kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
2086
2087 return !ktime_after(ktime_get(), kt_deadline);
2088}
2089
2090
2091
2092
2093
2094static void dm_request_fn(struct request_queue *q)
2095{
2096 struct mapped_device *md = q->queuedata;
2097 int srcu_idx;
2098 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2099 struct dm_target *ti;
2100 struct request *rq;
2101 struct dm_rq_target_io *tio;
2102 sector_t pos;
2103
2104
2105
2106
2107
2108
2109
2110 while (!blk_queue_stopped(q)) {
2111 rq = blk_peek_request(q);
2112 if (!rq)
2113 goto out;
2114
2115
2116 pos = 0;
2117 if (!(rq->cmd_flags & REQ_FLUSH))
2118 pos = blk_rq_pos(rq);
2119
2120 ti = dm_table_find_target(map, pos);
2121 if (!dm_target_is_valid(ti)) {
2122
2123
2124
2125
2126 DMERR_LIMIT("request attempted access beyond the end of device");
2127 dm_start_request(md, rq);
2128 dm_kill_unmapped_request(rq, -EIO);
2129 continue;
2130 }
2131
2132 if (dm_request_peeked_before_merge_deadline(md) &&
2133 md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
2134 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq))
2135 goto delay_and_out;
2136
2137 if (ti->type->busy && ti->type->busy(ti))
2138 goto delay_and_out;
2139
2140 dm_start_request(md, rq);
2141
2142 tio = tio_from_request(rq);
2143
2144 tio->ti = ti;
2145 queue_kthread_work(&md->kworker, &tio->work);
2146 BUG_ON(!irqs_disabled());
2147 }
2148
2149 goto out;
2150
2151delay_and_out:
2152 blk_delay_queue(q, HZ / 100);
2153out:
2154 dm_put_live_table(md, srcu_idx);
2155}
2156
2157static int dm_any_congested(void *congested_data, int bdi_bits)
2158{
2159 int r = bdi_bits;
2160 struct mapped_device *md = congested_data;
2161 struct dm_table *map;
2162
2163 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2164 map = dm_get_live_table_fast(md);
2165 if (map) {
2166
2167
2168
2169
2170 if (dm_request_based(md))
2171 r = md->queue->backing_dev_info.state &
2172 bdi_bits;
2173 else
2174 r = dm_table_any_congested(map, bdi_bits);
2175 }
2176 dm_put_live_table_fast(md);
2177 }
2178
2179 return r;
2180}
2181
2182
2183
2184
2185static void free_minor(int minor)
2186{
2187 spin_lock(&_minor_lock);
2188 idr_remove(&_minor_idr, minor);
2189 spin_unlock(&_minor_lock);
2190}
2191
2192
2193
2194
2195static int specific_minor(int minor)
2196{
2197 int r;
2198
2199 if (minor >= (1 << MINORBITS))
2200 return -EINVAL;
2201
2202 idr_preload(GFP_KERNEL);
2203 spin_lock(&_minor_lock);
2204
2205 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
2206
2207 spin_unlock(&_minor_lock);
2208 idr_preload_end();
2209 if (r < 0)
2210 return r == -ENOSPC ? -EBUSY : r;
2211 return 0;
2212}
2213
2214static int next_free_minor(int *minor)
2215{
2216 int r;
2217
2218 idr_preload(GFP_KERNEL);
2219 spin_lock(&_minor_lock);
2220
2221 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
2222
2223 spin_unlock(&_minor_lock);
2224 idr_preload_end();
2225 if (r < 0)
2226 return r;
2227 *minor = r;
2228 return 0;
2229}
2230
2231static const struct block_device_operations dm_blk_dops;
2232
2233static void dm_wq_work(struct work_struct *work);
2234
2235static void dm_init_md_queue(struct mapped_device *md)
2236{
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
2247}
2248
2249static void dm_init_old_md_queue(struct mapped_device *md)
2250{
2251 md->use_blk_mq = false;
2252 dm_init_md_queue(md);
2253
2254
2255
2256
2257 md->queue->queuedata = md;
2258 md->queue->backing_dev_info.congested_fn = dm_any_congested;
2259 md->queue->backing_dev_info.congested_data = md;
2260
2261 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
2262}
2263
2264
2265
2266
2267static struct mapped_device *alloc_dev(int minor)
2268{
2269 int r;
2270 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
2271 void *old_md;
2272
2273 if (!md) {
2274 DMWARN("unable to allocate device, out of memory.");
2275 return NULL;
2276 }
2277
2278 if (!try_module_get(THIS_MODULE))
2279 goto bad_module_get;
2280
2281
2282 if (minor == DM_ANY_MINOR)
2283 r = next_free_minor(&minor);
2284 else
2285 r = specific_minor(minor);
2286 if (r < 0)
2287 goto bad_minor;
2288
2289 r = init_srcu_struct(&md->io_barrier);
2290 if (r < 0)
2291 goto bad_io_barrier;
2292
2293 md->use_blk_mq = use_blk_mq;
2294 md->type = DM_TYPE_NONE;
2295 mutex_init(&md->suspend_lock);
2296 mutex_init(&md->type_lock);
2297 mutex_init(&md->table_devices_lock);
2298 spin_lock_init(&md->deferred_lock);
2299 atomic_set(&md->holders, 1);
2300 atomic_set(&md->open_count, 0);
2301 atomic_set(&md->event_nr, 0);
2302 atomic_set(&md->uevent_seq, 0);
2303 INIT_LIST_HEAD(&md->uevent_list);
2304 INIT_LIST_HEAD(&md->table_devices);
2305 spin_lock_init(&md->uevent_lock);
2306
2307 md->queue = blk_alloc_queue(GFP_KERNEL);
2308 if (!md->queue)
2309 goto bad_queue;
2310
2311 dm_init_md_queue(md);
2312
2313 md->disk = alloc_disk(1);
2314 if (!md->disk)
2315 goto bad_disk;
2316
2317 atomic_set(&md->pending[0], 0);
2318 atomic_set(&md->pending[1], 0);
2319 init_waitqueue_head(&md->wait);
2320 INIT_WORK(&md->work, dm_wq_work);
2321 init_waitqueue_head(&md->eventq);
2322 init_completion(&md->kobj_holder.completion);
2323 md->kworker_task = NULL;
2324
2325 md->disk->major = _major;
2326 md->disk->first_minor = minor;
2327 md->disk->fops = &dm_blk_dops;
2328 md->disk->queue = md->queue;
2329 md->disk->private_data = md;
2330 sprintf(md->disk->disk_name, "dm-%d", minor);
2331 add_disk(md->disk);
2332 format_dev_t(md->name, MKDEV(_major, minor));
2333
2334 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
2335 if (!md->wq)
2336 goto bad_thread;
2337
2338 md->bdev = bdget_disk(md->disk, 0);
2339 if (!md->bdev)
2340 goto bad_bdev;
2341
2342 bio_init(&md->flush_bio);
2343 md->flush_bio.bi_bdev = md->bdev;
2344 md->flush_bio.bi_rw = WRITE_FLUSH;
2345
2346 dm_stats_init(&md->stats);
2347
2348
2349 spin_lock(&_minor_lock);
2350 old_md = idr_replace(&_minor_idr, md, minor);
2351 spin_unlock(&_minor_lock);
2352
2353 BUG_ON(old_md != MINOR_ALLOCED);
2354
2355 return md;
2356
2357bad_bdev:
2358 destroy_workqueue(md->wq);
2359bad_thread:
2360 del_gendisk(md->disk);
2361 put_disk(md->disk);
2362bad_disk:
2363 blk_cleanup_queue(md->queue);
2364bad_queue:
2365 cleanup_srcu_struct(&md->io_barrier);
2366bad_io_barrier:
2367 free_minor(minor);
2368bad_minor:
2369 module_put(THIS_MODULE);
2370bad_module_get:
2371 kfree(md);
2372 return NULL;
2373}
2374
2375static void unlock_fs(struct mapped_device *md);
2376
2377static void free_dev(struct mapped_device *md)
2378{
2379 int minor = MINOR(disk_devt(md->disk));
2380
2381 unlock_fs(md);
2382 destroy_workqueue(md->wq);
2383
2384 if (md->kworker_task)
2385 kthread_stop(md->kworker_task);
2386 if (md->io_pool)
2387 mempool_destroy(md->io_pool);
2388 if (md->rq_pool)
2389 mempool_destroy(md->rq_pool);
2390 if (md->bs)
2391 bioset_free(md->bs);
2392
2393 cleanup_srcu_struct(&md->io_barrier);
2394 free_table_devices(&md->table_devices);
2395 dm_stats_cleanup(&md->stats);
2396
2397 spin_lock(&_minor_lock);
2398 md->disk->private_data = NULL;
2399 spin_unlock(&_minor_lock);
2400 if (blk_get_integrity(md->disk))
2401 blk_integrity_unregister(md->disk);
2402 del_gendisk(md->disk);
2403 put_disk(md->disk);
2404 blk_cleanup_queue(md->queue);
2405 if (md->use_blk_mq)
2406 blk_mq_free_tag_set(&md->tag_set);
2407 bdput(md->bdev);
2408 free_minor(minor);
2409
2410 module_put(THIS_MODULE);
2411 kfree(md);
2412}
2413
2414static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2415{
2416 struct dm_md_mempools *p = dm_table_get_md_mempools(t);
2417
2418 if (md->bs) {
2419
2420 if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
2421
2422
2423
2424
2425 bioset_free(md->bs);
2426 md->bs = p->bs;
2427 p->bs = NULL;
2428 }
2429
2430
2431
2432
2433
2434
2435
2436
2437 goto out;
2438 }
2439
2440 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
2441
2442 md->io_pool = p->io_pool;
2443 p->io_pool = NULL;
2444 md->rq_pool = p->rq_pool;
2445 p->rq_pool = NULL;
2446 md->bs = p->bs;
2447 p->bs = NULL;
2448
2449out:
2450
2451 dm_table_free_md_mempools(t);
2452}
2453
2454
2455
2456
2457static void event_callback(void *context)
2458{
2459 unsigned long flags;
2460 LIST_HEAD(uevents);
2461 struct mapped_device *md = (struct mapped_device *) context;
2462
2463 spin_lock_irqsave(&md->uevent_lock, flags);
2464 list_splice_init(&md->uevent_list, &uevents);
2465 spin_unlock_irqrestore(&md->uevent_lock, flags);
2466
2467 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
2468
2469 atomic_inc(&md->event_nr);
2470 wake_up(&md->eventq);
2471}
2472
2473
2474
2475
2476static void __set_size(struct mapped_device *md, sector_t size)
2477{
2478 set_capacity(md->disk, size);
2479
2480 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
2481}
2482
2483
2484
2485
2486
2487
2488
2489
2490int dm_queue_merge_is_compulsory(struct request_queue *q)
2491{
2492 struct mapped_device *dev_md;
2493
2494 if (!q->merge_bvec_fn)
2495 return 0;
2496
2497 if (q->make_request_fn == dm_make_request) {
2498 dev_md = q->queuedata;
2499 if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
2500 return 0;
2501 }
2502
2503 return 1;
2504}
2505
2506static int dm_device_merge_is_compulsory(struct dm_target *ti,
2507 struct dm_dev *dev, sector_t start,
2508 sector_t len, void *data)
2509{
2510 struct block_device *bdev = dev->bdev;
2511 struct request_queue *q = bdev_get_queue(bdev);
2512
2513 return dm_queue_merge_is_compulsory(q);
2514}
2515
2516
2517
2518
2519
2520static int dm_table_merge_is_optional(struct dm_table *table)
2521{
2522 unsigned i = 0;
2523 struct dm_target *ti;
2524
2525 while (i < dm_table_get_num_targets(table)) {
2526 ti = dm_table_get_target(table, i++);
2527
2528 if (ti->type->iterate_devices &&
2529 ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
2530 return 0;
2531 }
2532
2533 return 1;
2534}
2535
2536
2537
2538
2539static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2540 struct queue_limits *limits)
2541{
2542 struct dm_table *old_map;
2543 struct request_queue *q = md->queue;
2544 sector_t size;
2545 int merge_is_optional;
2546
2547 size = dm_table_get_size(t);
2548
2549
2550
2551
2552 if (size != dm_get_size(md))
2553 memset(&md->geometry, 0, sizeof(md->geometry));
2554
2555 __set_size(md, size);
2556
2557 dm_table_event_callback(t, event_callback, md);
2558
2559
2560
2561
2562
2563
2564
2565
2566 if (dm_table_request_based(t))
2567 stop_queue(q);
2568
2569 __bind_mempools(md, t);
2570
2571 merge_is_optional = dm_table_merge_is_optional(t);
2572
2573 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2574 rcu_assign_pointer(md->map, t);
2575 md->immutable_target_type = dm_table_get_immutable_target_type(t);
2576
2577 dm_table_set_restrictions(t, q, limits);
2578 if (merge_is_optional)
2579 set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2580 else
2581 clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2582 if (old_map)
2583 dm_sync_table(md);
2584
2585 return old_map;
2586}
2587
2588
2589
2590
2591static struct dm_table *__unbind(struct mapped_device *md)
2592{
2593 struct dm_table *map = rcu_dereference_protected(md->map, 1);
2594
2595 if (!map)
2596 return NULL;
2597
2598 dm_table_event_callback(map, NULL, NULL);
2599 RCU_INIT_POINTER(md->map, NULL);
2600 dm_sync_table(md);
2601
2602 return map;
2603}
2604
2605
2606
2607
2608int dm_create(int minor, struct mapped_device **result)
2609{
2610 struct mapped_device *md;
2611
2612 md = alloc_dev(minor);
2613 if (!md)
2614 return -ENXIO;
2615
2616 dm_sysfs_init(md);
2617
2618 *result = md;
2619 return 0;
2620}
2621
2622
2623
2624
2625
2626void dm_lock_md_type(struct mapped_device *md)
2627{
2628 mutex_lock(&md->type_lock);
2629}
2630
2631void dm_unlock_md_type(struct mapped_device *md)
2632{
2633 mutex_unlock(&md->type_lock);
2634}
2635
2636void dm_set_md_type(struct mapped_device *md, unsigned type)
2637{
2638 BUG_ON(!mutex_is_locked(&md->type_lock));
2639 md->type = type;
2640}
2641
2642unsigned dm_get_md_type(struct mapped_device *md)
2643{
2644 BUG_ON(!mutex_is_locked(&md->type_lock));
2645 return md->type;
2646}
2647
2648struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2649{
2650 return md->immutable_target_type;
2651}
2652
2653
2654
2655
2656
2657struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2658{
2659 BUG_ON(!atomic_read(&md->holders));
2660 return &md->queue->limits;
2661}
2662EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2663
2664static void init_rq_based_worker_thread(struct mapped_device *md)
2665{
2666
2667 init_kthread_worker(&md->kworker);
2668 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2669 "kdmwork-%s", dm_device_name(md));
2670}
2671
2672
2673
2674
2675static int dm_init_request_based_queue(struct mapped_device *md)
2676{
2677 struct request_queue *q = NULL;
2678
2679
2680 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
2681 if (!q)
2682 return -EINVAL;
2683
2684
2685 md->seq_rq_merge_deadline_usecs = 0;
2686
2687 md->queue = q;
2688 dm_init_old_md_queue(md);
2689 blk_queue_softirq_done(md->queue, dm_softirq_done);
2690 blk_queue_prep_rq(md->queue, dm_prep_fn);
2691
2692 init_rq_based_worker_thread(md);
2693
2694 elv_register_queue(md->queue);
2695
2696 return 0;
2697}
2698
2699static int dm_mq_init_request(void *data, struct request *rq,
2700 unsigned int hctx_idx, unsigned int request_idx,
2701 unsigned int numa_node)
2702{
2703 struct mapped_device *md = data;
2704 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2705
2706
2707
2708
2709
2710 tio->md = md;
2711
2712 return 0;
2713}
2714
2715static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
2716 const struct blk_mq_queue_data *bd)
2717{
2718 struct request *rq = bd->rq;
2719 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2720 struct mapped_device *md = tio->md;
2721 int srcu_idx;
2722 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2723 struct dm_target *ti;
2724 sector_t pos;
2725
2726
2727 pos = 0;
2728 if (!(rq->cmd_flags & REQ_FLUSH))
2729 pos = blk_rq_pos(rq);
2730
2731 ti = dm_table_find_target(map, pos);
2732 if (!dm_target_is_valid(ti)) {
2733 dm_put_live_table(md, srcu_idx);
2734 DMERR_LIMIT("request attempted access beyond the end of device");
2735
2736
2737
2738
2739 dm_start_request(md, rq);
2740 return BLK_MQ_RQ_QUEUE_ERROR;
2741 }
2742 dm_put_live_table(md, srcu_idx);
2743
2744 if (ti->type->busy && ti->type->busy(ti))
2745 return BLK_MQ_RQ_QUEUE_BUSY;
2746
2747 dm_start_request(md, rq);
2748
2749
2750 init_tio(tio, rq, md);
2751
2752
2753
2754
2755
2756 tio->ti = ti;
2757
2758
2759 if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
2760
2761 tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
2762 (void) clone_rq(rq, md, tio, GFP_ATOMIC);
2763 queue_kthread_work(&md->kworker, &tio->work);
2764 } else {
2765
2766 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
2767
2768 rq_completed(md, rq_data_dir(rq), false);
2769 return BLK_MQ_RQ_QUEUE_BUSY;
2770 }
2771 }
2772
2773 return BLK_MQ_RQ_QUEUE_OK;
2774}
2775
2776static struct blk_mq_ops dm_mq_ops = {
2777 .queue_rq = dm_mq_queue_rq,
2778 .map_queue = blk_mq_map_queue,
2779 .complete = dm_softirq_done,
2780 .init_request = dm_mq_init_request,
2781};
2782
2783static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
2784{
2785 unsigned md_type = dm_get_md_type(md);
2786 struct request_queue *q;
2787 int err;
2788
2789 memset(&md->tag_set, 0, sizeof(md->tag_set));
2790 md->tag_set.ops = &dm_mq_ops;
2791 md->tag_set.queue_depth = BLKDEV_MAX_RQ;
2792 md->tag_set.numa_node = NUMA_NO_NODE;
2793 md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2794 md->tag_set.nr_hw_queues = 1;
2795 if (md_type == DM_TYPE_REQUEST_BASED) {
2796
2797 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
2798 } else
2799 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
2800 md->tag_set.driver_data = md;
2801
2802 err = blk_mq_alloc_tag_set(&md->tag_set);
2803 if (err)
2804 return err;
2805
2806 q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
2807 if (IS_ERR(q)) {
2808 err = PTR_ERR(q);
2809 goto out_tag_set;
2810 }
2811 md->queue = q;
2812 dm_init_md_queue(md);
2813
2814
2815 blk_mq_register_disk(md->disk);
2816
2817 if (md_type == DM_TYPE_REQUEST_BASED)
2818 init_rq_based_worker_thread(md);
2819
2820 return 0;
2821
2822out_tag_set:
2823 blk_mq_free_tag_set(&md->tag_set);
2824 return err;
2825}
2826
2827static unsigned filter_md_type(unsigned type, struct mapped_device *md)
2828{
2829 if (type == DM_TYPE_BIO_BASED)
2830 return type;
2831
2832 return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
2833}
2834
2835
2836
2837
2838int dm_setup_md_queue(struct mapped_device *md)
2839{
2840 int r;
2841 unsigned md_type = filter_md_type(dm_get_md_type(md), md);
2842
2843 switch (md_type) {
2844 case DM_TYPE_REQUEST_BASED:
2845 r = dm_init_request_based_queue(md);
2846 if (r) {
2847 DMWARN("Cannot initialize queue for request-based mapped device");
2848 return r;
2849 }
2850 break;
2851 case DM_TYPE_MQ_REQUEST_BASED:
2852 r = dm_init_request_based_blk_mq_queue(md);
2853 if (r) {
2854 DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
2855 return r;
2856 }
2857 break;
2858 case DM_TYPE_BIO_BASED:
2859 dm_init_old_md_queue(md);
2860 blk_queue_make_request(md->queue, dm_make_request);
2861 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
2862 break;
2863 }
2864
2865 return 0;
2866}
2867
2868struct mapped_device *dm_get_md(dev_t dev)
2869{
2870 struct mapped_device *md;
2871 unsigned minor = MINOR(dev);
2872
2873 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2874 return NULL;
2875
2876 spin_lock(&_minor_lock);
2877
2878 md = idr_find(&_minor_idr, minor);
2879 if (md) {
2880 if ((md == MINOR_ALLOCED ||
2881 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2882 dm_deleting_md(md) ||
2883 test_bit(DMF_FREEING, &md->flags))) {
2884 md = NULL;
2885 goto out;
2886 }
2887 dm_get(md);
2888 }
2889
2890out:
2891 spin_unlock(&_minor_lock);
2892
2893 return md;
2894}
2895EXPORT_SYMBOL_GPL(dm_get_md);
2896
2897void *dm_get_mdptr(struct mapped_device *md)
2898{
2899 return md->interface_ptr;
2900}
2901
2902void dm_set_mdptr(struct mapped_device *md, void *ptr)
2903{
2904 md->interface_ptr = ptr;
2905}
2906
2907void dm_get(struct mapped_device *md)
2908{
2909 atomic_inc(&md->holders);
2910 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2911}
2912
2913int dm_hold(struct mapped_device *md)
2914{
2915 spin_lock(&_minor_lock);
2916 if (test_bit(DMF_FREEING, &md->flags)) {
2917 spin_unlock(&_minor_lock);
2918 return -EBUSY;
2919 }
2920 dm_get(md);
2921 spin_unlock(&_minor_lock);
2922 return 0;
2923}
2924EXPORT_SYMBOL_GPL(dm_hold);
2925
2926const char *dm_device_name(struct mapped_device *md)
2927{
2928 return md->name;
2929}
2930EXPORT_SYMBOL_GPL(dm_device_name);
2931
2932static void __dm_destroy(struct mapped_device *md, bool wait)
2933{
2934 struct dm_table *map;
2935 int srcu_idx;
2936
2937 might_sleep();
2938
2939 map = dm_get_live_table(md, &srcu_idx);
2940
2941 spin_lock(&_minor_lock);
2942 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2943 set_bit(DMF_FREEING, &md->flags);
2944 spin_unlock(&_minor_lock);
2945
2946 if (dm_request_based(md) && md->kworker_task)
2947 flush_kthread_worker(&md->kworker);
2948
2949
2950
2951
2952
2953 mutex_lock(&md->suspend_lock);
2954 if (!dm_suspended_md(md)) {
2955 dm_table_presuspend_targets(map);
2956 dm_table_postsuspend_targets(map);
2957 }
2958 mutex_unlock(&md->suspend_lock);
2959
2960
2961 dm_put_live_table(md, srcu_idx);
2962
2963
2964
2965
2966
2967
2968
2969 if (wait)
2970 while (atomic_read(&md->holders))
2971 msleep(1);
2972 else if (atomic_read(&md->holders))
2973 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2974 dm_device_name(md), atomic_read(&md->holders));
2975
2976 dm_sysfs_exit(md);
2977 dm_table_destroy(__unbind(md));
2978 free_dev(md);
2979}
2980
2981void dm_destroy(struct mapped_device *md)
2982{
2983 __dm_destroy(md, true);
2984}
2985
2986void dm_destroy_immediate(struct mapped_device *md)
2987{
2988 __dm_destroy(md, false);
2989}
2990
2991void dm_put(struct mapped_device *md)
2992{
2993 atomic_dec(&md->holders);
2994}
2995EXPORT_SYMBOL_GPL(dm_put);
2996
2997static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2998{
2999 int r = 0;
3000 DECLARE_WAITQUEUE(wait, current);
3001
3002 add_wait_queue(&md->wait, &wait);
3003
3004 while (1) {
3005 set_current_state(interruptible);
3006
3007 if (!md_in_flight(md))
3008 break;
3009
3010 if (interruptible == TASK_INTERRUPTIBLE &&
3011 signal_pending(current)) {
3012 r = -EINTR;
3013 break;
3014 }
3015
3016 io_schedule();
3017 }
3018 set_current_state(TASK_RUNNING);
3019
3020 remove_wait_queue(&md->wait, &wait);
3021
3022 return r;
3023}
3024
3025
3026
3027
3028static void dm_wq_work(struct work_struct *work)
3029{
3030 struct mapped_device *md = container_of(work, struct mapped_device,
3031 work);
3032 struct bio *c;
3033 int srcu_idx;
3034 struct dm_table *map;
3035
3036 map = dm_get_live_table(md, &srcu_idx);
3037
3038 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
3039 spin_lock_irq(&md->deferred_lock);
3040 c = bio_list_pop(&md->deferred);
3041 spin_unlock_irq(&md->deferred_lock);
3042
3043 if (!c)
3044 break;
3045
3046 if (dm_request_based(md))
3047 generic_make_request(c);
3048 else
3049 __split_and_process_bio(md, map, c);
3050 }
3051
3052 dm_put_live_table(md, srcu_idx);
3053}
3054
3055static void dm_queue_flush(struct mapped_device *md)
3056{
3057 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
3058 smp_mb__after_atomic();
3059 queue_work(md->wq, &md->work);
3060}
3061
3062
3063
3064
3065struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
3066{
3067 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
3068 struct queue_limits limits;
3069 int r;
3070
3071 mutex_lock(&md->suspend_lock);
3072
3073
3074 if (!dm_suspended_md(md))
3075 goto out;
3076
3077
3078
3079
3080
3081
3082
3083 if (dm_table_has_no_data_devices(table)) {
3084 live_map = dm_get_live_table_fast(md);
3085 if (live_map)
3086 limits = md->queue->limits;
3087 dm_put_live_table_fast(md);
3088 }
3089
3090 if (!live_map) {
3091 r = dm_calculate_queue_limits(table, &limits);
3092 if (r) {
3093 map = ERR_PTR(r);
3094 goto out;
3095 }
3096 }
3097
3098 map = __bind(md, table, &limits);
3099
3100out:
3101 mutex_unlock(&md->suspend_lock);
3102 return map;
3103}
3104
3105
3106
3107
3108
3109static int lock_fs(struct mapped_device *md)
3110{
3111 int r;
3112
3113 WARN_ON(md->frozen_sb);
3114
3115 md->frozen_sb = freeze_bdev(md->bdev);
3116 if (IS_ERR(md->frozen_sb)) {
3117 r = PTR_ERR(md->frozen_sb);
3118 md->frozen_sb = NULL;
3119 return r;
3120 }
3121
3122 set_bit(DMF_FROZEN, &md->flags);
3123
3124 return 0;
3125}
3126
3127static void unlock_fs(struct mapped_device *md)
3128{
3129 if (!test_bit(DMF_FROZEN, &md->flags))
3130 return;
3131
3132 thaw_bdev(md->bdev, md->frozen_sb);
3133 md->frozen_sb = NULL;
3134 clear_bit(DMF_FROZEN, &md->flags);
3135}
3136
3137
3138
3139
3140
3141
3142
3143
3144static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
3145 unsigned suspend_flags, int interruptible)
3146{
3147 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
3148 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
3149 int r;
3150
3151
3152
3153
3154
3155 if (noflush)
3156 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
3157
3158
3159
3160
3161
3162 dm_table_presuspend_targets(map);
3163
3164
3165
3166
3167
3168
3169
3170 if (!noflush && do_lockfs) {
3171 r = lock_fs(md);
3172 if (r) {
3173 dm_table_presuspend_undo_targets(map);
3174 return r;
3175 }
3176 }
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
3191 if (map)
3192 synchronize_srcu(&md->io_barrier);
3193
3194
3195
3196
3197
3198 if (dm_request_based(md)) {
3199 stop_queue(md->queue);
3200 if (md->kworker_task)
3201 flush_kthread_worker(&md->kworker);
3202 }
3203
3204 flush_workqueue(md->wq);
3205
3206
3207
3208
3209
3210
3211 r = dm_wait_for_completion(md, interruptible);
3212
3213 if (noflush)
3214 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
3215 if (map)
3216 synchronize_srcu(&md->io_barrier);
3217
3218
3219 if (r < 0) {
3220 dm_queue_flush(md);
3221
3222 if (dm_request_based(md))
3223 start_queue(md->queue);
3224
3225 unlock_fs(md);
3226 dm_table_presuspend_undo_targets(map);
3227
3228 }
3229
3230 return r;
3231}
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
3250{
3251 struct dm_table *map = NULL;
3252 int r = 0;
3253
3254retry:
3255 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
3256
3257 if (dm_suspended_md(md)) {
3258 r = -EINVAL;
3259 goto out_unlock;
3260 }
3261
3262 if (dm_suspended_internally_md(md)) {
3263
3264 mutex_unlock(&md->suspend_lock);
3265 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
3266 if (r)
3267 return r;
3268 goto retry;
3269 }
3270
3271 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3272
3273 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
3274 if (r)
3275 goto out_unlock;
3276
3277 set_bit(DMF_SUSPENDED, &md->flags);
3278
3279 dm_table_postsuspend_targets(map);
3280
3281out_unlock:
3282 mutex_unlock(&md->suspend_lock);
3283 return r;
3284}
3285
3286static int __dm_resume(struct mapped_device *md, struct dm_table *map)
3287{
3288 if (map) {
3289 int r = dm_table_resume_targets(map);
3290 if (r)
3291 return r;
3292 }
3293
3294 dm_queue_flush(md);
3295
3296
3297
3298
3299
3300
3301 if (dm_request_based(md))
3302 start_queue(md->queue);
3303
3304 unlock_fs(md);
3305
3306 return 0;
3307}
3308
3309int dm_resume(struct mapped_device *md)
3310{
3311 int r = -EINVAL;
3312 struct dm_table *map = NULL;
3313
3314retry:
3315 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
3316
3317 if (!dm_suspended_md(md))
3318 goto out;
3319
3320 if (dm_suspended_internally_md(md)) {
3321
3322 mutex_unlock(&md->suspend_lock);
3323 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
3324 if (r)
3325 return r;
3326 goto retry;
3327 }
3328
3329 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3330 if (!map || !dm_table_get_size(map))
3331 goto out;
3332
3333 r = __dm_resume(md, map);
3334 if (r)
3335 goto out;
3336
3337 clear_bit(DMF_SUSPENDED, &md->flags);
3338
3339 r = 0;
3340out:
3341 mutex_unlock(&md->suspend_lock);
3342
3343 return r;
3344}
3345
3346
3347
3348
3349
3350
3351
3352static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
3353{
3354 struct dm_table *map = NULL;
3355
3356 if (md->internal_suspend_count++)
3357 return;
3358
3359 if (dm_suspended_md(md)) {
3360 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3361 return;
3362 }
3363
3364 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3365
3366
3367
3368
3369
3370
3371
3372 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
3373
3374 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3375
3376 dm_table_postsuspend_targets(map);
3377}
3378
3379static void __dm_internal_resume(struct mapped_device *md)
3380{
3381 BUG_ON(!md->internal_suspend_count);
3382
3383 if (--md->internal_suspend_count)
3384 return;
3385
3386 if (dm_suspended_md(md))
3387 goto done;
3388
3389
3390
3391
3392
3393 (void) __dm_resume(md, NULL);
3394
3395done:
3396 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3397 smp_mb__after_atomic();
3398 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
3399}
3400
3401void dm_internal_suspend_noflush(struct mapped_device *md)
3402{
3403 mutex_lock(&md->suspend_lock);
3404 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
3405 mutex_unlock(&md->suspend_lock);
3406}
3407EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
3408
3409void dm_internal_resume(struct mapped_device *md)
3410{
3411 mutex_lock(&md->suspend_lock);
3412 __dm_internal_resume(md);
3413 mutex_unlock(&md->suspend_lock);
3414}
3415EXPORT_SYMBOL_GPL(dm_internal_resume);
3416
3417
3418
3419
3420
3421
3422void dm_internal_suspend_fast(struct mapped_device *md)
3423{
3424 mutex_lock(&md->suspend_lock);
3425 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
3426 return;
3427
3428 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
3429 synchronize_srcu(&md->io_barrier);
3430 flush_workqueue(md->wq);
3431 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
3432}
3433EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
3434
3435void dm_internal_resume_fast(struct mapped_device *md)
3436{
3437 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
3438 goto done;
3439
3440 dm_queue_flush(md);
3441
3442done:
3443 mutex_unlock(&md->suspend_lock);
3444}
3445EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
3446
3447
3448
3449
3450int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
3451 unsigned cookie)
3452{
3453 char udev_cookie[DM_COOKIE_LENGTH];
3454 char *envp[] = { udev_cookie, NULL };
3455
3456 if (!cookie)
3457 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
3458 else {
3459 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
3460 DM_COOKIE_ENV_VAR_NAME, cookie);
3461 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
3462 action, envp);
3463 }
3464}
3465
3466uint32_t dm_next_uevent_seq(struct mapped_device *md)
3467{
3468 return atomic_add_return(1, &md->uevent_seq);
3469}
3470
3471uint32_t dm_get_event_nr(struct mapped_device *md)
3472{
3473 return atomic_read(&md->event_nr);
3474}
3475
3476int dm_wait_event(struct mapped_device *md, int event_nr)
3477{
3478 return wait_event_interruptible(md->eventq,
3479 (event_nr != atomic_read(&md->event_nr)));
3480}
3481
3482void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
3483{
3484 unsigned long flags;
3485
3486 spin_lock_irqsave(&md->uevent_lock, flags);
3487 list_add(elist, &md->uevent_list);
3488 spin_unlock_irqrestore(&md->uevent_lock, flags);
3489}
3490
3491
3492
3493
3494
3495struct gendisk *dm_disk(struct mapped_device *md)
3496{
3497 return md->disk;
3498}
3499EXPORT_SYMBOL_GPL(dm_disk);
3500
3501struct kobject *dm_kobject(struct mapped_device *md)
3502{
3503 return &md->kobj_holder.kobj;
3504}
3505
3506struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
3507{
3508 struct mapped_device *md;
3509
3510 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
3511
3512 if (test_bit(DMF_FREEING, &md->flags) ||
3513 dm_deleting_md(md))
3514 return NULL;
3515
3516 dm_get(md);
3517 return md;
3518}
3519
3520int dm_suspended_md(struct mapped_device *md)
3521{
3522 return test_bit(DMF_SUSPENDED, &md->flags);
3523}
3524
3525int dm_suspended_internally_md(struct mapped_device *md)
3526{
3527 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3528}
3529
3530int dm_test_deferred_remove_flag(struct mapped_device *md)
3531{
3532 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
3533}
3534
3535int dm_suspended(struct dm_target *ti)
3536{
3537 return dm_suspended_md(dm_table_get_md(ti->table));
3538}
3539EXPORT_SYMBOL_GPL(dm_suspended);
3540
3541int dm_noflush_suspending(struct dm_target *ti)
3542{
3543 return __noflush_suspending(dm_table_get_md(ti->table));
3544}
3545EXPORT_SYMBOL_GPL(dm_noflush_suspending);
3546
3547struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
3548 unsigned integrity, unsigned per_bio_data_size)
3549{
3550 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
3551 struct kmem_cache *cachep = NULL;
3552 unsigned int pool_size = 0;
3553 unsigned int front_pad;
3554
3555 if (!pools)
3556 return NULL;
3557
3558 type = filter_md_type(type, md);
3559
3560 switch (type) {
3561 case DM_TYPE_BIO_BASED:
3562 cachep = _io_cache;
3563 pool_size = dm_get_reserved_bio_based_ios();
3564 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
3565 break;
3566 case DM_TYPE_REQUEST_BASED:
3567 cachep = _rq_tio_cache;
3568 pool_size = dm_get_reserved_rq_based_ios();
3569 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
3570 if (!pools->rq_pool)
3571 goto out;
3572
3573 case DM_TYPE_MQ_REQUEST_BASED:
3574 if (!pool_size)
3575 pool_size = dm_get_reserved_rq_based_ios();
3576 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
3577
3578 WARN_ON(per_bio_data_size != 0);
3579 break;
3580 default:
3581 BUG();
3582 }
3583
3584 if (cachep) {
3585 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
3586 if (!pools->io_pool)
3587 goto out;
3588 }
3589
3590 pools->bs = bioset_create_nobvec(pool_size, front_pad);
3591 if (!pools->bs)
3592 goto out;
3593
3594 if (integrity && bioset_integrity_create(pools->bs, pool_size))
3595 goto out;
3596
3597 return pools;
3598
3599out:
3600 dm_free_md_mempools(pools);
3601
3602 return NULL;
3603}
3604
3605void dm_free_md_mempools(struct dm_md_mempools *pools)
3606{
3607 if (!pools)
3608 return;
3609
3610 if (pools->io_pool)
3611 mempool_destroy(pools->io_pool);
3612
3613 if (pools->rq_pool)
3614 mempool_destroy(pools->rq_pool);
3615
3616 if (pools->bs)
3617 bioset_free(pools->bs);
3618
3619 kfree(pools);
3620}
3621
3622static const struct block_device_operations dm_blk_dops = {
3623 .open = dm_blk_open,
3624 .release = dm_blk_close,
3625 .ioctl = dm_blk_ioctl,
3626 .getgeo = dm_blk_getgeo,
3627 .owner = THIS_MODULE
3628};
3629
3630
3631
3632
3633module_init(dm_init);
3634module_exit(dm_exit);
3635
3636module_param(major, uint, 0);
3637MODULE_PARM_DESC(major, "The major number of the device mapper");
3638
3639module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3640MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3641
3642module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
3643MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
3644
3645module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
3646MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
3647
3648MODULE_DESCRIPTION(DM_NAME " driver");
3649MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3650MODULE_LICENSE("GPL");
3651