1
2
3
4
5
6
7
8#include "dm.h"
9#include "dm-uevent.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/mutex.h>
14#include <linux/moduleparam.h>
15#include <linux/blkpg.h>
16#include <linux/bio.h>
17#include <linux/mempool.h>
18#include <linux/slab.h>
19#include <linux/idr.h>
20#include <linux/hdreg.h>
21#include <linux/delay.h>
22#include <linux/wait.h>
23#include <linux/kthread.h>
24#include <linux/ktime.h>
25#include <linux/elevator.h>
26#include <linux/blk-mq.h>
27#include <linux/pr.h>
28
29#include <trace/events/block.h>
30
31#define DM_MSG_PREFIX "core"
32
33#ifdef CONFIG_PRINTK
34
35
36
37DEFINE_RATELIMIT_STATE(dm_ratelimit_state,
38 DEFAULT_RATELIMIT_INTERVAL,
39 DEFAULT_RATELIMIT_BURST);
40EXPORT_SYMBOL(dm_ratelimit_state);
41#endif
42
43
44
45
46
47#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
48#define DM_COOKIE_LENGTH 24
49
50static const char *_name = DM_NAME;
51
52static unsigned int major = 0;
53static unsigned int _major = 0;
54
55static DEFINE_IDR(_minor_idr);
56
57static DEFINE_SPINLOCK(_minor_lock);
58
59static void do_deferred_remove(struct work_struct *w);
60
61static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
62
63static struct workqueue_struct *deferred_remove_workqueue;
64
65
66
67
68
69struct dm_io {
70 struct mapped_device *md;
71 int error;
72 atomic_t io_count;
73 struct bio *bio;
74 unsigned long start_time;
75 spinlock_t endio_lock;
76 struct dm_stats_aux stats_aux;
77};
78
79
80
81
82
83struct dm_rq_target_io {
84 struct mapped_device *md;
85 struct dm_target *ti;
86 struct request *orig, *clone;
87 struct kthread_work work;
88 int error;
89 union map_info info;
90 struct dm_stats_aux stats_aux;
91 unsigned long duration_jiffies;
92 unsigned n_sectors;
93};
94
95
96
97
98
99
100
101
102
103struct dm_rq_clone_bio_info {
104 struct bio *orig;
105 struct dm_rq_target_io *tio;
106 struct bio clone;
107};
108
109union map_info *dm_get_rq_mapinfo(struct request *rq)
110{
111 if (rq && rq->end_io_data)
112 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
113 return NULL;
114}
115EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
116
117#define MINOR_ALLOCED ((void *)-1)
118
119
120
121
122#define DMF_BLOCK_IO_FOR_SUSPEND 0
123#define DMF_SUSPENDED 1
124#define DMF_FROZEN 2
125#define DMF_FREEING 3
126#define DMF_DELETING 4
127#define DMF_NOFLUSH_SUSPENDING 5
128#define DMF_DEFERRED_REMOVE 6
129#define DMF_SUSPENDED_INTERNALLY 7
130
131
132
133
134
135struct dm_table {
136 int undefined__;
137};
138
139
140
141
142struct mapped_device {
143 struct srcu_struct io_barrier;
144 struct mutex suspend_lock;
145 atomic_t holders;
146 atomic_t open_count;
147
148
149
150
151
152
153 struct dm_table __rcu *map;
154
155 struct list_head table_devices;
156 struct mutex table_devices_lock;
157
158 unsigned long flags;
159
160 struct request_queue *queue;
161 unsigned type;
162
163 struct mutex type_lock;
164
165 struct target_type *immutable_target_type;
166
167 struct gendisk *disk;
168 char name[16];
169
170 void *interface_ptr;
171
172
173
174
175 atomic_t pending[2];
176 wait_queue_head_t wait;
177 struct work_struct work;
178 struct bio_list deferred;
179 spinlock_t deferred_lock;
180
181
182
183
184 struct workqueue_struct *wq;
185
186
187
188
189 mempool_t *io_pool;
190 mempool_t *rq_pool;
191
192 struct bio_set *bs;
193
194
195
196
197 atomic_t event_nr;
198 wait_queue_head_t eventq;
199 atomic_t uevent_seq;
200 struct list_head uevent_list;
201 spinlock_t uevent_lock;
202
203
204
205
206 struct super_block *frozen_sb;
207 struct block_device *bdev;
208
209
210 struct hd_geometry geometry;
211
212
213 struct dm_kobject_holder kobj_holder;
214
215
216 struct bio flush_bio;
217
218
219 unsigned internal_suspend_count;
220
221 struct dm_stats stats;
222
223 struct kthread_worker kworker;
224 struct task_struct *kworker_task;
225
226
227 unsigned seq_rq_merge_deadline_usecs;
228 int last_rq_rw;
229 sector_t last_rq_pos;
230 ktime_t last_rq_start_time;
231
232
233 struct blk_mq_tag_set tag_set;
234 bool use_blk_mq;
235};
236
237#ifdef CONFIG_DM_MQ_DEFAULT
238static bool use_blk_mq = true;
239#else
240static bool use_blk_mq = false;
241#endif
242
243bool dm_use_blk_mq(struct mapped_device *md)
244{
245 return md->use_blk_mq;
246}
247
248
249
250
251struct dm_md_mempools {
252 mempool_t *io_pool;
253 mempool_t *rq_pool;
254 struct bio_set *bs;
255};
256
257struct table_device {
258 struct list_head list;
259 atomic_t count;
260 struct dm_dev dm_dev;
261};
262
263#define RESERVED_BIO_BASED_IOS 16
264#define RESERVED_REQUEST_BASED_IOS 256
265#define RESERVED_MAX_IOS 1024
266static struct kmem_cache *_io_cache;
267static struct kmem_cache *_rq_tio_cache;
268static struct kmem_cache *_rq_cache;
269
270
271
272
273static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
274
275
276
277
278static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
279
280static unsigned __dm_get_module_param(unsigned *module_param,
281 unsigned def, unsigned max)
282{
283 unsigned param = ACCESS_ONCE(*module_param);
284 unsigned modified_param = 0;
285
286 if (!param)
287 modified_param = def;
288 else if (param > max)
289 modified_param = max;
290
291 if (modified_param) {
292 (void)cmpxchg(module_param, param, modified_param);
293 param = modified_param;
294 }
295
296 return param;
297}
298
299unsigned dm_get_reserved_bio_based_ios(void)
300{
301 return __dm_get_module_param(&reserved_bio_based_ios,
302 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
303}
304EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
305
306unsigned dm_get_reserved_rq_based_ios(void)
307{
308 return __dm_get_module_param(&reserved_rq_based_ios,
309 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
310}
311EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
312
313static int __init local_init(void)
314{
315 int r = -ENOMEM;
316
317
318 _io_cache = KMEM_CACHE(dm_io, 0);
319 if (!_io_cache)
320 return r;
321
322 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
323 if (!_rq_tio_cache)
324 goto out_free_io_cache;
325
326 _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
327 __alignof__(struct request), 0, NULL);
328 if (!_rq_cache)
329 goto out_free_rq_tio_cache;
330
331 r = dm_uevent_init();
332 if (r)
333 goto out_free_rq_cache;
334
335 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
336 if (!deferred_remove_workqueue) {
337 r = -ENOMEM;
338 goto out_uevent_exit;
339 }
340
341 _major = major;
342 r = register_blkdev(_major, _name);
343 if (r < 0)
344 goto out_free_workqueue;
345
346 if (!_major)
347 _major = r;
348
349 return 0;
350
351out_free_workqueue:
352 destroy_workqueue(deferred_remove_workqueue);
353out_uevent_exit:
354 dm_uevent_exit();
355out_free_rq_cache:
356 kmem_cache_destroy(_rq_cache);
357out_free_rq_tio_cache:
358 kmem_cache_destroy(_rq_tio_cache);
359out_free_io_cache:
360 kmem_cache_destroy(_io_cache);
361
362 return r;
363}
364
365static void local_exit(void)
366{
367 flush_scheduled_work();
368 destroy_workqueue(deferred_remove_workqueue);
369
370 kmem_cache_destroy(_rq_cache);
371 kmem_cache_destroy(_rq_tio_cache);
372 kmem_cache_destroy(_io_cache);
373 unregister_blkdev(_major, _name);
374 dm_uevent_exit();
375
376 _major = 0;
377
378 DMINFO("cleaned up");
379}
380
381static int (*_inits[])(void) __initdata = {
382 local_init,
383 dm_target_init,
384 dm_linear_init,
385 dm_stripe_init,
386 dm_io_init,
387 dm_kcopyd_init,
388 dm_interface_init,
389 dm_statistics_init,
390};
391
392static void (*_exits[])(void) = {
393 local_exit,
394 dm_target_exit,
395 dm_linear_exit,
396 dm_stripe_exit,
397 dm_io_exit,
398 dm_kcopyd_exit,
399 dm_interface_exit,
400 dm_statistics_exit,
401};
402
403static int __init dm_init(void)
404{
405 const int count = ARRAY_SIZE(_inits);
406
407 int r, i;
408
409 for (i = 0; i < count; i++) {
410 r = _inits[i]();
411 if (r)
412 goto bad;
413 }
414
415 return 0;
416
417 bad:
418 while (i--)
419 _exits[i]();
420
421 return r;
422}
423
424static void __exit dm_exit(void)
425{
426 int i = ARRAY_SIZE(_exits);
427
428 while (i--)
429 _exits[i]();
430
431
432
433
434 idr_destroy(&_minor_idr);
435}
436
437
438
439
440int dm_deleting_md(struct mapped_device *md)
441{
442 return test_bit(DMF_DELETING, &md->flags);
443}
444
445static int dm_blk_open(struct block_device *bdev, fmode_t mode)
446{
447 struct mapped_device *md;
448
449 spin_lock(&_minor_lock);
450
451 md = bdev->bd_disk->private_data;
452 if (!md)
453 goto out;
454
455 if (test_bit(DMF_FREEING, &md->flags) ||
456 dm_deleting_md(md)) {
457 md = NULL;
458 goto out;
459 }
460
461 dm_get(md);
462 atomic_inc(&md->open_count);
463out:
464 spin_unlock(&_minor_lock);
465
466 return md ? 0 : -ENXIO;
467}
468
469static void dm_blk_close(struct gendisk *disk, fmode_t mode)
470{
471 struct mapped_device *md;
472
473 spin_lock(&_minor_lock);
474
475 md = disk->private_data;
476 if (WARN_ON(!md))
477 goto out;
478
479 if (atomic_dec_and_test(&md->open_count) &&
480 (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
481 queue_work(deferred_remove_workqueue, &deferred_remove_work);
482
483 dm_put(md);
484out:
485 spin_unlock(&_minor_lock);
486}
487
488int dm_open_count(struct mapped_device *md)
489{
490 return atomic_read(&md->open_count);
491}
492
493
494
495
496int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
497{
498 int r = 0;
499
500 spin_lock(&_minor_lock);
501
502 if (dm_open_count(md)) {
503 r = -EBUSY;
504 if (mark_deferred)
505 set_bit(DMF_DEFERRED_REMOVE, &md->flags);
506 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
507 r = -EEXIST;
508 else
509 set_bit(DMF_DELETING, &md->flags);
510
511 spin_unlock(&_minor_lock);
512
513 return r;
514}
515
516int dm_cancel_deferred_remove(struct mapped_device *md)
517{
518 int r = 0;
519
520 spin_lock(&_minor_lock);
521
522 if (test_bit(DMF_DELETING, &md->flags))
523 r = -EBUSY;
524 else
525 clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
526
527 spin_unlock(&_minor_lock);
528
529 return r;
530}
531
532static void do_deferred_remove(struct work_struct *w)
533{
534 dm_deferred_remove();
535}
536
537sector_t dm_get_size(struct mapped_device *md)
538{
539 return get_capacity(md->disk);
540}
541
542struct request_queue *dm_get_md_queue(struct mapped_device *md)
543{
544 return md->queue;
545}
546
547struct dm_stats *dm_get_stats(struct mapped_device *md)
548{
549 return &md->stats;
550}
551
552static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
553{
554 struct mapped_device *md = bdev->bd_disk->private_data;
555
556 return dm_get_geometry(md, geo);
557}
558
559static int dm_get_live_table_for_ioctl(struct mapped_device *md,
560 struct dm_target **tgt, struct block_device **bdev,
561 fmode_t *mode, int *srcu_idx)
562{
563 struct dm_table *map;
564 int r;
565
566retry:
567 r = -ENOTTY;
568 map = dm_get_live_table(md, srcu_idx);
569 if (!map || !dm_table_get_size(map))
570 goto out;
571
572
573 if (dm_table_get_num_targets(map) != 1)
574 goto out;
575
576 *tgt = dm_table_get_target(map, 0);
577
578 if (!(*tgt)->type->prepare_ioctl)
579 goto out;
580
581 if (dm_suspended_md(md)) {
582 r = -EAGAIN;
583 goto out;
584 }
585
586 r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode);
587 if (r < 0)
588 goto out;
589
590 return r;
591
592out:
593 dm_put_live_table(md, *srcu_idx);
594 if (r == -ENOTCONN && !fatal_signal_pending(current)) {
595 msleep(10);
596 goto retry;
597 }
598 return r;
599}
600
601static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
602 unsigned int cmd, unsigned long arg)
603{
604 struct mapped_device *md = bdev->bd_disk->private_data;
605 struct dm_target *tgt;
606 struct block_device *tgt_bdev = NULL;
607 int srcu_idx, r;
608
609 r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx);
610 if (r < 0)
611 return r;
612
613 if (r > 0) {
614
615
616
617
618
619 r = scsi_verify_blk_ioctl(NULL, cmd);
620 if (r)
621 goto out;
622 }
623
624 r = __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg);
625out:
626 dm_put_live_table(md, srcu_idx);
627 return r;
628}
629
630static struct dm_io *alloc_io(struct mapped_device *md)
631{
632 return mempool_alloc(md->io_pool, GFP_NOIO);
633}
634
635static void free_io(struct mapped_device *md, struct dm_io *io)
636{
637 mempool_free(io, md->io_pool);
638}
639
640static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
641{
642 bio_put(&tio->clone);
643}
644
645static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
646 gfp_t gfp_mask)
647{
648 return mempool_alloc(md->io_pool, gfp_mask);
649}
650
651static void free_rq_tio(struct dm_rq_target_io *tio)
652{
653 mempool_free(tio, tio->md->io_pool);
654}
655
656static struct request *alloc_clone_request(struct mapped_device *md,
657 gfp_t gfp_mask)
658{
659 return mempool_alloc(md->rq_pool, gfp_mask);
660}
661
662static void free_clone_request(struct mapped_device *md, struct request *rq)
663{
664 mempool_free(rq, md->rq_pool);
665}
666
667static int md_in_flight(struct mapped_device *md)
668{
669 return atomic_read(&md->pending[READ]) +
670 atomic_read(&md->pending[WRITE]);
671}
672
673static void start_io_acct(struct dm_io *io)
674{
675 struct mapped_device *md = io->md;
676 struct bio *bio = io->bio;
677 int cpu;
678 int rw = bio_data_dir(bio);
679
680 io->start_time = jiffies;
681
682 cpu = part_stat_lock();
683 part_round_stats(cpu, &dm_disk(md)->part0);
684 part_stat_unlock();
685 atomic_set(&dm_disk(md)->part0.in_flight[rw],
686 atomic_inc_return(&md->pending[rw]));
687
688 if (unlikely(dm_stats_used(&md->stats)))
689 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
690 bio_sectors(bio), false, 0, &io->stats_aux);
691}
692
693static void end_io_acct(struct dm_io *io)
694{
695 struct mapped_device *md = io->md;
696 struct bio *bio = io->bio;
697 unsigned long duration = jiffies - io->start_time;
698 int pending;
699 int rw = bio_data_dir(bio);
700
701 generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
702
703 if (unlikely(dm_stats_used(&md->stats)))
704 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
705 bio_sectors(bio), true, duration, &io->stats_aux);
706
707
708
709
710
711 pending = atomic_dec_return(&md->pending[rw]);
712 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
713 pending += atomic_read(&md->pending[rw^0x1]);
714
715
716 if (!pending)
717 wake_up(&md->wait);
718}
719
720
721
722
723static void queue_io(struct mapped_device *md, struct bio *bio)
724{
725 unsigned long flags;
726
727 spin_lock_irqsave(&md->deferred_lock, flags);
728 bio_list_add(&md->deferred, bio);
729 spin_unlock_irqrestore(&md->deferred_lock, flags);
730 queue_work(md->wq, &md->work);
731}
732
733
734
735
736
737
738struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
739{
740 *srcu_idx = srcu_read_lock(&md->io_barrier);
741
742 return srcu_dereference(md->map, &md->io_barrier);
743}
744
745void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
746{
747 srcu_read_unlock(&md->io_barrier, srcu_idx);
748}
749
750void dm_sync_table(struct mapped_device *md)
751{
752 synchronize_srcu(&md->io_barrier);
753 synchronize_rcu_expedited();
754}
755
756
757
758
759
760static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
761{
762 rcu_read_lock();
763 return rcu_dereference(md->map);
764}
765
766static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
767{
768 rcu_read_unlock();
769}
770
771
772
773
774static int open_table_device(struct table_device *td, dev_t dev,
775 struct mapped_device *md)
776{
777 static char *_claim_ptr = "I belong to device-mapper";
778 struct block_device *bdev;
779
780 int r;
781
782 BUG_ON(td->dm_dev.bdev);
783
784 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
785 if (IS_ERR(bdev))
786 return PTR_ERR(bdev);
787
788 r = bd_link_disk_holder(bdev, dm_disk(md));
789 if (r) {
790 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
791 return r;
792 }
793
794 td->dm_dev.bdev = bdev;
795 return 0;
796}
797
798
799
800
801static void close_table_device(struct table_device *td, struct mapped_device *md)
802{
803 if (!td->dm_dev.bdev)
804 return;
805
806 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
807 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
808 td->dm_dev.bdev = NULL;
809}
810
811static struct table_device *find_table_device(struct list_head *l, dev_t dev,
812 fmode_t mode) {
813 struct table_device *td;
814
815 list_for_each_entry(td, l, list)
816 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
817 return td;
818
819 return NULL;
820}
821
822int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
823 struct dm_dev **result) {
824 int r;
825 struct table_device *td;
826
827 mutex_lock(&md->table_devices_lock);
828 td = find_table_device(&md->table_devices, dev, mode);
829 if (!td) {
830 td = kmalloc(sizeof(*td), GFP_KERNEL);
831 if (!td) {
832 mutex_unlock(&md->table_devices_lock);
833 return -ENOMEM;
834 }
835
836 td->dm_dev.mode = mode;
837 td->dm_dev.bdev = NULL;
838
839 if ((r = open_table_device(td, dev, md))) {
840 mutex_unlock(&md->table_devices_lock);
841 kfree(td);
842 return r;
843 }
844
845 format_dev_t(td->dm_dev.name, dev);
846
847 atomic_set(&td->count, 0);
848 list_add(&td->list, &md->table_devices);
849 }
850 atomic_inc(&td->count);
851 mutex_unlock(&md->table_devices_lock);
852
853 *result = &td->dm_dev;
854 return 0;
855}
856EXPORT_SYMBOL_GPL(dm_get_table_device);
857
858void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
859{
860 struct table_device *td = container_of(d, struct table_device, dm_dev);
861
862 mutex_lock(&md->table_devices_lock);
863 if (atomic_dec_and_test(&td->count)) {
864 close_table_device(td, md);
865 list_del(&td->list);
866 kfree(td);
867 }
868 mutex_unlock(&md->table_devices_lock);
869}
870EXPORT_SYMBOL(dm_put_table_device);
871
872static void free_table_devices(struct list_head *devices)
873{
874 struct list_head *tmp, *next;
875
876 list_for_each_safe(tmp, next, devices) {
877 struct table_device *td = list_entry(tmp, struct table_device, list);
878
879 DMWARN("dm_destroy: %s still exists with %d references",
880 td->dm_dev.name, atomic_read(&td->count));
881 kfree(td);
882 }
883}
884
885
886
887
888int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
889{
890 *geo = md->geometry;
891
892 return 0;
893}
894
895
896
897
898int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
899{
900 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
901
902 if (geo->start > sz) {
903 DMWARN("Start sector is beyond the geometry limits.");
904 return -EINVAL;
905 }
906
907 md->geometry = *geo;
908
909 return 0;
910}
911
912
913
914
915
916
917
918
919
920
921static int __noflush_suspending(struct mapped_device *md)
922{
923 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
924}
925
926
927
928
929
930static void dec_pending(struct dm_io *io, int error)
931{
932 unsigned long flags;
933 int io_error;
934 struct bio *bio;
935 struct mapped_device *md = io->md;
936
937
938 if (unlikely(error)) {
939 spin_lock_irqsave(&io->endio_lock, flags);
940 if (!(io->error > 0 && __noflush_suspending(md)))
941 io->error = error;
942 spin_unlock_irqrestore(&io->endio_lock, flags);
943 }
944
945 if (atomic_dec_and_test(&io->io_count)) {
946 if (io->error == DM_ENDIO_REQUEUE) {
947
948
949
950 spin_lock_irqsave(&md->deferred_lock, flags);
951 if (__noflush_suspending(md))
952 bio_list_add_head(&md->deferred, io->bio);
953 else
954
955 io->error = -EIO;
956 spin_unlock_irqrestore(&md->deferred_lock, flags);
957 }
958
959 io_error = io->error;
960 bio = io->bio;
961 end_io_acct(io);
962 free_io(md, io);
963
964 if (io_error == DM_ENDIO_REQUEUE)
965 return;
966
967 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
968
969
970
971
972 bio->bi_rw &= ~REQ_FLUSH;
973 queue_io(md, bio);
974 } else {
975
976 trace_block_bio_complete(md->queue, bio, io_error);
977 bio->bi_error = io_error;
978 bio_endio(bio);
979 }
980 }
981}
982
983static void disable_write_same(struct mapped_device *md)
984{
985 struct queue_limits *limits = dm_get_queue_limits(md);
986
987
988 limits->max_write_same_sectors = 0;
989}
990
991static void clone_endio(struct bio *bio)
992{
993 int error = bio->bi_error;
994 int r = error;
995 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
996 struct dm_io *io = tio->io;
997 struct mapped_device *md = tio->io->md;
998 dm_endio_fn endio = tio->ti->type->end_io;
999
1000 if (endio) {
1001 r = endio(tio->ti, bio, error);
1002 if (r < 0 || r == DM_ENDIO_REQUEUE)
1003
1004
1005
1006
1007 error = r;
1008 else if (r == DM_ENDIO_INCOMPLETE)
1009
1010 return;
1011 else if (r) {
1012 DMWARN("unimplemented target endio return value: %d", r);
1013 BUG();
1014 }
1015 }
1016
1017 if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
1018 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
1019 disable_write_same(md);
1020
1021 free_tio(md, tio);
1022 dec_pending(io, error);
1023}
1024
1025
1026
1027
1028static void end_clone_bio(struct bio *clone)
1029{
1030 struct dm_rq_clone_bio_info *info =
1031 container_of(clone, struct dm_rq_clone_bio_info, clone);
1032 struct dm_rq_target_io *tio = info->tio;
1033 struct bio *bio = info->orig;
1034 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
1035 int error = clone->bi_error;
1036
1037 bio_put(clone);
1038
1039 if (tio->error)
1040
1041
1042
1043
1044
1045 return;
1046 else if (error) {
1047
1048
1049
1050
1051
1052 tio->error = error;
1053 return;
1054 }
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066 if (tio->orig->bio != bio)
1067 DMERR("bio completion is going in the middle of the request");
1068
1069
1070
1071
1072
1073
1074 blk_update_request(tio->orig, 0, nr_bytes);
1075}
1076
1077static struct dm_rq_target_io *tio_from_request(struct request *rq)
1078{
1079 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
1080}
1081
1082static void rq_end_stats(struct mapped_device *md, struct request *orig)
1083{
1084 if (unlikely(dm_stats_used(&md->stats))) {
1085 struct dm_rq_target_io *tio = tio_from_request(orig);
1086 tio->duration_jiffies = jiffies - tio->duration_jiffies;
1087 dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
1088 tio->n_sectors, true, tio->duration_jiffies,
1089 &tio->stats_aux);
1090 }
1091}
1092
1093
1094
1095
1096
1097
1098static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
1099{
1100 atomic_dec(&md->pending[rw]);
1101
1102
1103 if (!md_in_flight(md))
1104 wake_up(&md->wait);
1105
1106
1107
1108
1109
1110
1111
1112 if (run_queue) {
1113 if (md->queue->mq_ops)
1114 blk_mq_run_hw_queues(md->queue, true);
1115 else
1116 blk_run_queue_async(md->queue);
1117 }
1118
1119
1120
1121
1122 dm_put(md);
1123}
1124
1125static void free_rq_clone(struct request *clone)
1126{
1127 struct dm_rq_target_io *tio = clone->end_io_data;
1128 struct mapped_device *md = tio->md;
1129
1130 blk_rq_unprep_clone(clone);
1131
1132 if (md->type == DM_TYPE_MQ_REQUEST_BASED)
1133
1134 tio->ti->type->release_clone_rq(clone);
1135 else if (!md->queue->mq_ops)
1136
1137 free_clone_request(md, clone);
1138
1139
1140
1141
1142
1143
1144 if (!md->queue->mq_ops)
1145 free_rq_tio(tio);
1146}
1147
1148
1149
1150
1151
1152
1153static void dm_end_request(struct request *clone, int error)
1154{
1155 int rw = rq_data_dir(clone);
1156 struct dm_rq_target_io *tio = clone->end_io_data;
1157 struct mapped_device *md = tio->md;
1158 struct request *rq = tio->orig;
1159
1160 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
1161 rq->errors = clone->errors;
1162 rq->resid_len = clone->resid_len;
1163
1164 if (rq->sense)
1165
1166
1167
1168
1169
1170 rq->sense_len = clone->sense_len;
1171 }
1172
1173 free_rq_clone(clone);
1174 rq_end_stats(md, rq);
1175 if (!rq->q->mq_ops)
1176 blk_end_request_all(rq, error);
1177 else
1178 blk_mq_end_request(rq, error);
1179 rq_completed(md, rw, true);
1180}
1181
1182static void dm_unprep_request(struct request *rq)
1183{
1184 struct dm_rq_target_io *tio = tio_from_request(rq);
1185 struct request *clone = tio->clone;
1186
1187 if (!rq->q->mq_ops) {
1188 rq->special = NULL;
1189 rq->cmd_flags &= ~REQ_DONTPREP;
1190 }
1191
1192 if (clone)
1193 free_rq_clone(clone);
1194 else if (!tio->md->queue->mq_ops)
1195 free_rq_tio(tio);
1196}
1197
1198
1199
1200
1201static void old_requeue_request(struct request *rq)
1202{
1203 struct request_queue *q = rq->q;
1204 unsigned long flags;
1205
1206 spin_lock_irqsave(q->queue_lock, flags);
1207 blk_requeue_request(q, rq);
1208 blk_run_queue_async(q);
1209 spin_unlock_irqrestore(q->queue_lock, flags);
1210}
1211
1212static void dm_requeue_original_request(struct mapped_device *md,
1213 struct request *rq)
1214{
1215 int rw = rq_data_dir(rq);
1216
1217 dm_unprep_request(rq);
1218
1219 rq_end_stats(md, rq);
1220 if (!rq->q->mq_ops)
1221 old_requeue_request(rq);
1222 else {
1223 blk_mq_requeue_request(rq);
1224 blk_mq_kick_requeue_list(rq->q);
1225 }
1226
1227 rq_completed(md, rw, false);
1228}
1229
1230static void old_stop_queue(struct request_queue *q)
1231{
1232 unsigned long flags;
1233
1234 if (blk_queue_stopped(q))
1235 return;
1236
1237 spin_lock_irqsave(q->queue_lock, flags);
1238 blk_stop_queue(q);
1239 spin_unlock_irqrestore(q->queue_lock, flags);
1240}
1241
1242static void stop_queue(struct request_queue *q)
1243{
1244 if (!q->mq_ops)
1245 old_stop_queue(q);
1246 else
1247 blk_mq_stop_hw_queues(q);
1248}
1249
1250static void old_start_queue(struct request_queue *q)
1251{
1252 unsigned long flags;
1253
1254 spin_lock_irqsave(q->queue_lock, flags);
1255 if (blk_queue_stopped(q))
1256 blk_start_queue(q);
1257 spin_unlock_irqrestore(q->queue_lock, flags);
1258}
1259
1260static void start_queue(struct request_queue *q)
1261{
1262 if (!q->mq_ops)
1263 old_start_queue(q);
1264 else
1265 blk_mq_start_stopped_hw_queues(q, true);
1266}
1267
1268static void dm_done(struct request *clone, int error, bool mapped)
1269{
1270 int r = error;
1271 struct dm_rq_target_io *tio = clone->end_io_data;
1272 dm_request_endio_fn rq_end_io = NULL;
1273
1274 if (tio->ti) {
1275 rq_end_io = tio->ti->type->rq_end_io;
1276
1277 if (mapped && rq_end_io)
1278 r = rq_end_io(tio->ti, clone, error, &tio->info);
1279 }
1280
1281 if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
1282 !clone->q->limits.max_write_same_sectors))
1283 disable_write_same(tio->md);
1284
1285 if (r <= 0)
1286
1287 dm_end_request(clone, r);
1288 else if (r == DM_ENDIO_INCOMPLETE)
1289
1290 return;
1291 else if (r == DM_ENDIO_REQUEUE)
1292
1293 dm_requeue_original_request(tio->md, tio->orig);
1294 else {
1295 DMWARN("unimplemented target endio return value: %d", r);
1296 BUG();
1297 }
1298}
1299
1300
1301
1302
1303static void dm_softirq_done(struct request *rq)
1304{
1305 bool mapped = true;
1306 struct dm_rq_target_io *tio = tio_from_request(rq);
1307 struct request *clone = tio->clone;
1308 int rw;
1309
1310 if (!clone) {
1311 rq_end_stats(tio->md, rq);
1312 rw = rq_data_dir(rq);
1313 if (!rq->q->mq_ops) {
1314 blk_end_request_all(rq, tio->error);
1315 rq_completed(tio->md, rw, false);
1316 free_rq_tio(tio);
1317 } else {
1318 blk_mq_end_request(rq, tio->error);
1319 rq_completed(tio->md, rw, false);
1320 }
1321 return;
1322 }
1323
1324 if (rq->cmd_flags & REQ_FAILED)
1325 mapped = false;
1326
1327 dm_done(clone, tio->error, mapped);
1328}
1329
1330
1331
1332
1333
1334static void dm_complete_request(struct request *rq, int error)
1335{
1336 struct dm_rq_target_io *tio = tio_from_request(rq);
1337
1338 tio->error = error;
1339 blk_complete_request(rq);
1340}
1341
1342
1343
1344
1345
1346
1347
1348static void dm_kill_unmapped_request(struct request *rq, int error)
1349{
1350 rq->cmd_flags |= REQ_FAILED;
1351 dm_complete_request(rq, error);
1352}
1353
1354
1355
1356
1357static void end_clone_request(struct request *clone, int error)
1358{
1359 struct dm_rq_target_io *tio = clone->end_io_data;
1360
1361 if (!clone->q->mq_ops) {
1362
1363
1364
1365
1366
1367
1368 __blk_put_request(clone->q, clone);
1369 }
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379 dm_complete_request(tio->orig, error);
1380}
1381
1382
1383
1384
1385
1386static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
1387{
1388 sector_t target_offset = dm_target_offset(ti, sector);
1389
1390 return ti->len - target_offset;
1391}
1392
1393static sector_t max_io_len(sector_t sector, struct dm_target *ti)
1394{
1395 sector_t len = max_io_len_target_boundary(sector, ti);
1396 sector_t offset, max_len;
1397
1398
1399
1400
1401 if (ti->max_io_len) {
1402 offset = dm_target_offset(ti, sector);
1403 if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
1404 max_len = sector_div(offset, ti->max_io_len);
1405 else
1406 max_len = offset & (ti->max_io_len - 1);
1407 max_len = ti->max_io_len - max_len;
1408
1409 if (len > max_len)
1410 len = max_len;
1411 }
1412
1413 return len;
1414}
1415
1416int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
1417{
1418 if (len > UINT_MAX) {
1419 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
1420 (unsigned long long)len, UINT_MAX);
1421 ti->error = "Maximum size of target IO is too large";
1422 return -EINVAL;
1423 }
1424
1425 ti->max_io_len = (uint32_t) len;
1426
1427 return 0;
1428}
1429EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1460{
1461 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1462 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
1463 BUG_ON(bio->bi_rw & REQ_FLUSH);
1464 BUG_ON(bi_size > *tio->len_ptr);
1465 BUG_ON(n_sectors > bi_size);
1466 *tio->len_ptr -= bi_size - n_sectors;
1467 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1468}
1469EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1470
1471static void __map_bio(struct dm_target_io *tio)
1472{
1473 int r;
1474 sector_t sector;
1475 struct mapped_device *md;
1476 struct bio *clone = &tio->clone;
1477 struct dm_target *ti = tio->ti;
1478
1479 clone->bi_end_io = clone_endio;
1480
1481
1482
1483
1484
1485
1486 atomic_inc(&tio->io->io_count);
1487 sector = clone->bi_iter.bi_sector;
1488 r = ti->type->map(ti, clone);
1489 if (r == DM_MAPIO_REMAPPED) {
1490
1491
1492 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
1493 tio->io->bio->bi_bdev->bd_dev, sector);
1494
1495 generic_make_request(clone);
1496 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
1497
1498 md = tio->io->md;
1499 dec_pending(tio->io, r);
1500 free_tio(md, tio);
1501 } else if (r != DM_MAPIO_SUBMITTED) {
1502 DMWARN("unimplemented target map return value: %d", r);
1503 BUG();
1504 }
1505}
1506
1507struct clone_info {
1508 struct mapped_device *md;
1509 struct dm_table *map;
1510 struct bio *bio;
1511 struct dm_io *io;
1512 sector_t sector;
1513 unsigned sector_count;
1514};
1515
1516static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
1517{
1518 bio->bi_iter.bi_sector = sector;
1519 bio->bi_iter.bi_size = to_bytes(len);
1520}
1521
1522
1523
1524
1525static void clone_bio(struct dm_target_io *tio, struct bio *bio,
1526 sector_t sector, unsigned len)
1527{
1528 struct bio *clone = &tio->clone;
1529
1530 __bio_clone_fast(clone, bio);
1531
1532 if (bio_integrity(bio))
1533 bio_integrity_clone(clone, bio, GFP_NOIO);
1534
1535 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
1536 clone->bi_iter.bi_size = to_bytes(len);
1537
1538 if (bio_integrity(bio))
1539 bio_integrity_trim(clone, 0, len);
1540}
1541
1542static struct dm_target_io *alloc_tio(struct clone_info *ci,
1543 struct dm_target *ti,
1544 unsigned target_bio_nr)
1545{
1546 struct dm_target_io *tio;
1547 struct bio *clone;
1548
1549 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
1550 tio = container_of(clone, struct dm_target_io, clone);
1551
1552 tio->io = ci->io;
1553 tio->ti = ti;
1554 tio->target_bio_nr = target_bio_nr;
1555
1556 return tio;
1557}
1558
1559static void __clone_and_map_simple_bio(struct clone_info *ci,
1560 struct dm_target *ti,
1561 unsigned target_bio_nr, unsigned *len)
1562{
1563 struct dm_target_io *tio = alloc_tio(ci, ti, target_bio_nr);
1564 struct bio *clone = &tio->clone;
1565
1566 tio->len_ptr = len;
1567
1568 __bio_clone_fast(clone, ci->bio);
1569 if (len)
1570 bio_setup_sector(clone, ci->sector, *len);
1571
1572 __map_bio(tio);
1573}
1574
1575static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
1576 unsigned num_bios, unsigned *len)
1577{
1578 unsigned target_bio_nr;
1579
1580 for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++)
1581 __clone_and_map_simple_bio(ci, ti, target_bio_nr, len);
1582}
1583
1584static int __send_empty_flush(struct clone_info *ci)
1585{
1586 unsigned target_nr = 0;
1587 struct dm_target *ti;
1588
1589 BUG_ON(bio_has_data(ci->bio));
1590 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1591 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
1592
1593 return 0;
1594}
1595
1596static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
1597 sector_t sector, unsigned *len)
1598{
1599 struct bio *bio = ci->bio;
1600 struct dm_target_io *tio;
1601 unsigned target_bio_nr;
1602 unsigned num_target_bios = 1;
1603
1604
1605
1606
1607 if (bio_data_dir(bio) == WRITE && ti->num_write_bios)
1608 num_target_bios = ti->num_write_bios(ti, bio);
1609
1610 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
1611 tio = alloc_tio(ci, ti, target_bio_nr);
1612 tio->len_ptr = len;
1613 clone_bio(tio, bio, sector, *len);
1614 __map_bio(tio);
1615 }
1616}
1617
1618typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
1619
1620static unsigned get_num_discard_bios(struct dm_target *ti)
1621{
1622 return ti->num_discard_bios;
1623}
1624
1625static unsigned get_num_write_same_bios(struct dm_target *ti)
1626{
1627 return ti->num_write_same_bios;
1628}
1629
1630typedef bool (*is_split_required_fn)(struct dm_target *ti);
1631
1632static bool is_split_required_for_discard(struct dm_target *ti)
1633{
1634 return ti->split_discard_bios;
1635}
1636
1637static int __send_changing_extent_only(struct clone_info *ci,
1638 get_num_bios_fn get_num_bios,
1639 is_split_required_fn is_split_required)
1640{
1641 struct dm_target *ti;
1642 unsigned len;
1643 unsigned num_bios;
1644
1645 do {
1646 ti = dm_table_find_target(ci->map, ci->sector);
1647 if (!dm_target_is_valid(ti))
1648 return -EIO;
1649
1650
1651
1652
1653
1654
1655
1656 num_bios = get_num_bios ? get_num_bios(ti) : 0;
1657 if (!num_bios)
1658 return -EOPNOTSUPP;
1659
1660 if (is_split_required && !is_split_required(ti))
1661 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1662 else
1663 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
1664
1665 __send_duplicate_bios(ci, ti, num_bios, &len);
1666
1667 ci->sector += len;
1668 } while (ci->sector_count -= len);
1669
1670 return 0;
1671}
1672
1673static int __send_discard(struct clone_info *ci)
1674{
1675 return __send_changing_extent_only(ci, get_num_discard_bios,
1676 is_split_required_for_discard);
1677}
1678
1679static int __send_write_same(struct clone_info *ci)
1680{
1681 return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
1682}
1683
1684
1685
1686
1687static int __split_and_process_non_flush(struct clone_info *ci)
1688{
1689 struct bio *bio = ci->bio;
1690 struct dm_target *ti;
1691 unsigned len;
1692
1693 if (unlikely(bio->bi_rw & REQ_DISCARD))
1694 return __send_discard(ci);
1695 else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
1696 return __send_write_same(ci);
1697
1698 ti = dm_table_find_target(ci->map, ci->sector);
1699 if (!dm_target_is_valid(ti))
1700 return -EIO;
1701
1702 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
1703
1704 __clone_and_map_data_bio(ci, ti, ci->sector, &len);
1705
1706 ci->sector += len;
1707 ci->sector_count -= len;
1708
1709 return 0;
1710}
1711
1712
1713
1714
1715static void __split_and_process_bio(struct mapped_device *md,
1716 struct dm_table *map, struct bio *bio)
1717{
1718 struct clone_info ci;
1719 int error = 0;
1720
1721 if (unlikely(!map)) {
1722 bio_io_error(bio);
1723 return;
1724 }
1725
1726 ci.map = map;
1727 ci.md = md;
1728 ci.io = alloc_io(md);
1729 ci.io->error = 0;
1730 atomic_set(&ci.io->io_count, 1);
1731 ci.io->bio = bio;
1732 ci.io->md = md;
1733 spin_lock_init(&ci.io->endio_lock);
1734 ci.sector = bio->bi_iter.bi_sector;
1735
1736 start_io_acct(ci.io);
1737
1738 if (bio->bi_rw & REQ_FLUSH) {
1739 ci.bio = &ci.md->flush_bio;
1740 ci.sector_count = 0;
1741 error = __send_empty_flush(&ci);
1742
1743 } else {
1744 ci.bio = bio;
1745 ci.sector_count = bio_sectors(bio);
1746 while (ci.sector_count && !error)
1747 error = __split_and_process_non_flush(&ci);
1748 }
1749
1750
1751 dec_pending(ci.io, error);
1752}
1753
1754
1755
1756
1757
1758
1759
1760
1761static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
1762{
1763 int rw = bio_data_dir(bio);
1764 struct mapped_device *md = q->queuedata;
1765 int srcu_idx;
1766 struct dm_table *map;
1767
1768 map = dm_get_live_table(md, &srcu_idx);
1769
1770 generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
1771
1772
1773 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
1774 dm_put_live_table(md, srcu_idx);
1775
1776 if (bio_rw(bio) != READA)
1777 queue_io(md, bio);
1778 else
1779 bio_io_error(bio);
1780 return BLK_QC_T_NONE;
1781 }
1782
1783 __split_and_process_bio(md, map, bio);
1784 dm_put_live_table(md, srcu_idx);
1785 return BLK_QC_T_NONE;
1786}
1787
1788int dm_request_based(struct mapped_device *md)
1789{
1790 return blk_queue_stackable(md->queue);
1791}
1792
1793static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
1794{
1795 int r;
1796
1797 if (blk_queue_io_stat(clone->q))
1798 clone->cmd_flags |= REQ_IO_STAT;
1799
1800 clone->start_time = jiffies;
1801 r = blk_insert_cloned_request(clone->q, clone);
1802 if (r)
1803
1804 dm_complete_request(rq, r);
1805}
1806
1807static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1808 void *data)
1809{
1810 struct dm_rq_target_io *tio = data;
1811 struct dm_rq_clone_bio_info *info =
1812 container_of(bio, struct dm_rq_clone_bio_info, clone);
1813
1814 info->orig = bio_orig;
1815 info->tio = tio;
1816 bio->bi_end_io = end_clone_bio;
1817
1818 return 0;
1819}
1820
1821static int setup_clone(struct request *clone, struct request *rq,
1822 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1823{
1824 int r;
1825
1826 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
1827 dm_rq_bio_constructor, tio);
1828 if (r)
1829 return r;
1830
1831 clone->cmd = rq->cmd;
1832 clone->cmd_len = rq->cmd_len;
1833 clone->sense = rq->sense;
1834 clone->end_io = end_clone_request;
1835 clone->end_io_data = tio;
1836
1837 tio->clone = clone;
1838
1839 return 0;
1840}
1841
1842static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1843 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1844{
1845
1846
1847
1848
1849 bool alloc_clone = !tio->clone;
1850 struct request *clone;
1851
1852 if (alloc_clone) {
1853 clone = alloc_clone_request(md, gfp_mask);
1854 if (!clone)
1855 return NULL;
1856 } else
1857 clone = tio->clone;
1858
1859 blk_rq_init(NULL, clone);
1860 if (setup_clone(clone, rq, tio, gfp_mask)) {
1861
1862 if (alloc_clone)
1863 free_clone_request(md, clone);
1864 return NULL;
1865 }
1866
1867 return clone;
1868}
1869
1870static void map_tio_request(struct kthread_work *work);
1871
1872static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
1873 struct mapped_device *md)
1874{
1875 tio->md = md;
1876 tio->ti = NULL;
1877 tio->clone = NULL;
1878 tio->orig = rq;
1879 tio->error = 0;
1880 memset(&tio->info, 0, sizeof(tio->info));
1881 if (md->kworker_task)
1882 init_kthread_work(&tio->work, map_tio_request);
1883}
1884
1885static struct dm_rq_target_io *prep_tio(struct request *rq,
1886 struct mapped_device *md, gfp_t gfp_mask)
1887{
1888 struct dm_rq_target_io *tio;
1889 int srcu_idx;
1890 struct dm_table *table;
1891
1892 tio = alloc_rq_tio(md, gfp_mask);
1893 if (!tio)
1894 return NULL;
1895
1896 init_tio(tio, rq, md);
1897
1898 table = dm_get_live_table(md, &srcu_idx);
1899 if (!dm_table_mq_request_based(table)) {
1900 if (!clone_rq(rq, md, tio, gfp_mask)) {
1901 dm_put_live_table(md, srcu_idx);
1902 free_rq_tio(tio);
1903 return NULL;
1904 }
1905 }
1906 dm_put_live_table(md, srcu_idx);
1907
1908 return tio;
1909}
1910
1911
1912
1913
1914static int dm_prep_fn(struct request_queue *q, struct request *rq)
1915{
1916 struct mapped_device *md = q->queuedata;
1917 struct dm_rq_target_io *tio;
1918
1919 if (unlikely(rq->special)) {
1920 DMWARN("Already has something in rq->special.");
1921 return BLKPREP_KILL;
1922 }
1923
1924 tio = prep_tio(rq, md, GFP_ATOMIC);
1925 if (!tio)
1926 return BLKPREP_DEFER;
1927
1928 rq->special = tio;
1929 rq->cmd_flags |= REQ_DONTPREP;
1930
1931 return BLKPREP_OK;
1932}
1933
1934
1935
1936
1937
1938
1939
1940static int map_request(struct dm_rq_target_io *tio, struct request *rq,
1941 struct mapped_device *md)
1942{
1943 int r;
1944 struct dm_target *ti = tio->ti;
1945 struct request *clone = NULL;
1946
1947 if (tio->clone) {
1948 clone = tio->clone;
1949 r = ti->type->map_rq(ti, clone, &tio->info);
1950 } else {
1951 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
1952 if (r < 0) {
1953
1954 dm_kill_unmapped_request(rq, r);
1955 return r;
1956 }
1957 if (r != DM_MAPIO_REMAPPED)
1958 return r;
1959 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
1960
1961 ti->type->release_clone_rq(clone);
1962 return DM_MAPIO_REQUEUE;
1963 }
1964 }
1965
1966 switch (r) {
1967 case DM_MAPIO_SUBMITTED:
1968
1969 break;
1970 case DM_MAPIO_REMAPPED:
1971
1972 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1973 blk_rq_pos(rq));
1974 dm_dispatch_clone_request(clone, rq);
1975 break;
1976 case DM_MAPIO_REQUEUE:
1977
1978 dm_requeue_original_request(md, tio->orig);
1979 break;
1980 default:
1981 if (r > 0) {
1982 DMWARN("unimplemented target map return value: %d", r);
1983 BUG();
1984 }
1985
1986
1987 dm_kill_unmapped_request(rq, r);
1988 return r;
1989 }
1990
1991 return 0;
1992}
1993
1994static void map_tio_request(struct kthread_work *work)
1995{
1996 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
1997 struct request *rq = tio->orig;
1998 struct mapped_device *md = tio->md;
1999
2000 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
2001 dm_requeue_original_request(md, rq);
2002}
2003
2004static void dm_start_request(struct mapped_device *md, struct request *orig)
2005{
2006 if (!orig->q->mq_ops)
2007 blk_start_request(orig);
2008 else
2009 blk_mq_start_request(orig);
2010 atomic_inc(&md->pending[rq_data_dir(orig)]);
2011
2012 if (md->seq_rq_merge_deadline_usecs) {
2013 md->last_rq_pos = rq_end_sector(orig);
2014 md->last_rq_rw = rq_data_dir(orig);
2015 md->last_rq_start_time = ktime_get();
2016 }
2017
2018 if (unlikely(dm_stats_used(&md->stats))) {
2019 struct dm_rq_target_io *tio = tio_from_request(orig);
2020 tio->duration_jiffies = jiffies;
2021 tio->n_sectors = blk_rq_sectors(orig);
2022 dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
2023 tio->n_sectors, false, 0, &tio->stats_aux);
2024 }
2025
2026
2027
2028
2029
2030
2031
2032
2033 dm_get(md);
2034}
2035
2036#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
2037
2038ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
2039{
2040 return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
2041}
2042
2043ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
2044 const char *buf, size_t count)
2045{
2046 unsigned deadline;
2047
2048 if (!dm_request_based(md) || md->use_blk_mq)
2049 return count;
2050
2051 if (kstrtouint(buf, 10, &deadline))
2052 return -EINVAL;
2053
2054 if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
2055 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
2056
2057 md->seq_rq_merge_deadline_usecs = deadline;
2058
2059 return count;
2060}
2061
2062static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
2063{
2064 ktime_t kt_deadline;
2065
2066 if (!md->seq_rq_merge_deadline_usecs)
2067 return false;
2068
2069 kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
2070 kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
2071
2072 return !ktime_after(ktime_get(), kt_deadline);
2073}
2074
2075
2076
2077
2078
2079static void dm_request_fn(struct request_queue *q)
2080{
2081 struct mapped_device *md = q->queuedata;
2082 int srcu_idx;
2083 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2084 struct dm_target *ti;
2085 struct request *rq;
2086 struct dm_rq_target_io *tio;
2087 sector_t pos;
2088
2089
2090
2091
2092
2093
2094
2095 while (!blk_queue_stopped(q)) {
2096 rq = blk_peek_request(q);
2097 if (!rq)
2098 goto out;
2099
2100
2101 pos = 0;
2102 if (!(rq->cmd_flags & REQ_FLUSH))
2103 pos = blk_rq_pos(rq);
2104
2105 ti = dm_table_find_target(map, pos);
2106 if (!dm_target_is_valid(ti)) {
2107
2108
2109
2110
2111 DMERR_LIMIT("request attempted access beyond the end of device");
2112 dm_start_request(md, rq);
2113 dm_kill_unmapped_request(rq, -EIO);
2114 continue;
2115 }
2116
2117 if (dm_request_peeked_before_merge_deadline(md) &&
2118 md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
2119 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq))
2120 goto delay_and_out;
2121
2122 if (ti->type->busy && ti->type->busy(ti))
2123 goto delay_and_out;
2124
2125 dm_start_request(md, rq);
2126
2127 tio = tio_from_request(rq);
2128
2129 tio->ti = ti;
2130 queue_kthread_work(&md->kworker, &tio->work);
2131 BUG_ON(!irqs_disabled());
2132 }
2133
2134 goto out;
2135
2136delay_and_out:
2137 blk_delay_queue(q, HZ / 100);
2138out:
2139 dm_put_live_table(md, srcu_idx);
2140}
2141
2142static int dm_any_congested(void *congested_data, int bdi_bits)
2143{
2144 int r = bdi_bits;
2145 struct mapped_device *md = congested_data;
2146 struct dm_table *map;
2147
2148 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2149 map = dm_get_live_table_fast(md);
2150 if (map) {
2151
2152
2153
2154
2155 if (dm_request_based(md))
2156 r = md->queue->backing_dev_info.wb.state &
2157 bdi_bits;
2158 else
2159 r = dm_table_any_congested(map, bdi_bits);
2160 }
2161 dm_put_live_table_fast(md);
2162 }
2163
2164 return r;
2165}
2166
2167
2168
2169
2170static void free_minor(int minor)
2171{
2172 spin_lock(&_minor_lock);
2173 idr_remove(&_minor_idr, minor);
2174 spin_unlock(&_minor_lock);
2175}
2176
2177
2178
2179
2180static int specific_minor(int minor)
2181{
2182 int r;
2183
2184 if (minor >= (1 << MINORBITS))
2185 return -EINVAL;
2186
2187 idr_preload(GFP_KERNEL);
2188 spin_lock(&_minor_lock);
2189
2190 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
2191
2192 spin_unlock(&_minor_lock);
2193 idr_preload_end();
2194 if (r < 0)
2195 return r == -ENOSPC ? -EBUSY : r;
2196 return 0;
2197}
2198
2199static int next_free_minor(int *minor)
2200{
2201 int r;
2202
2203 idr_preload(GFP_KERNEL);
2204 spin_lock(&_minor_lock);
2205
2206 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
2207
2208 spin_unlock(&_minor_lock);
2209 idr_preload_end();
2210 if (r < 0)
2211 return r;
2212 *minor = r;
2213 return 0;
2214}
2215
2216static const struct block_device_operations dm_blk_dops;
2217
2218static void dm_wq_work(struct work_struct *work);
2219
2220static void dm_init_md_queue(struct mapped_device *md)
2221{
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
2232
2233
2234
2235
2236
2237 md->queue->queuedata = md;
2238 md->queue->backing_dev_info.congested_data = md;
2239}
2240
2241static void dm_init_old_md_queue(struct mapped_device *md)
2242{
2243 md->use_blk_mq = false;
2244 dm_init_md_queue(md);
2245
2246
2247
2248
2249 md->queue->backing_dev_info.congested_fn = dm_any_congested;
2250 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
2251}
2252
2253static void cleanup_mapped_device(struct mapped_device *md)
2254{
2255 if (md->wq)
2256 destroy_workqueue(md->wq);
2257 if (md->kworker_task)
2258 kthread_stop(md->kworker_task);
2259 mempool_destroy(md->io_pool);
2260 mempool_destroy(md->rq_pool);
2261 if (md->bs)
2262 bioset_free(md->bs);
2263
2264 cleanup_srcu_struct(&md->io_barrier);
2265
2266 if (md->disk) {
2267 spin_lock(&_minor_lock);
2268 md->disk->private_data = NULL;
2269 spin_unlock(&_minor_lock);
2270 del_gendisk(md->disk);
2271 put_disk(md->disk);
2272 }
2273
2274 if (md->queue)
2275 blk_cleanup_queue(md->queue);
2276
2277 if (md->bdev) {
2278 bdput(md->bdev);
2279 md->bdev = NULL;
2280 }
2281}
2282
2283
2284
2285
2286static struct mapped_device *alloc_dev(int minor)
2287{
2288 int r;
2289 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
2290 void *old_md;
2291
2292 if (!md) {
2293 DMWARN("unable to allocate device, out of memory.");
2294 return NULL;
2295 }
2296
2297 if (!try_module_get(THIS_MODULE))
2298 goto bad_module_get;
2299
2300
2301 if (minor == DM_ANY_MINOR)
2302 r = next_free_minor(&minor);
2303 else
2304 r = specific_minor(minor);
2305 if (r < 0)
2306 goto bad_minor;
2307
2308 r = init_srcu_struct(&md->io_barrier);
2309 if (r < 0)
2310 goto bad_io_barrier;
2311
2312 md->use_blk_mq = use_blk_mq;
2313 md->type = DM_TYPE_NONE;
2314 mutex_init(&md->suspend_lock);
2315 mutex_init(&md->type_lock);
2316 mutex_init(&md->table_devices_lock);
2317 spin_lock_init(&md->deferred_lock);
2318 atomic_set(&md->holders, 1);
2319 atomic_set(&md->open_count, 0);
2320 atomic_set(&md->event_nr, 0);
2321 atomic_set(&md->uevent_seq, 0);
2322 INIT_LIST_HEAD(&md->uevent_list);
2323 INIT_LIST_HEAD(&md->table_devices);
2324 spin_lock_init(&md->uevent_lock);
2325
2326 md->queue = blk_alloc_queue(GFP_KERNEL);
2327 if (!md->queue)
2328 goto bad;
2329
2330 dm_init_md_queue(md);
2331
2332 md->disk = alloc_disk(1);
2333 if (!md->disk)
2334 goto bad;
2335
2336 atomic_set(&md->pending[0], 0);
2337 atomic_set(&md->pending[1], 0);
2338 init_waitqueue_head(&md->wait);
2339 INIT_WORK(&md->work, dm_wq_work);
2340 init_waitqueue_head(&md->eventq);
2341 init_completion(&md->kobj_holder.completion);
2342 md->kworker_task = NULL;
2343
2344 md->disk->major = _major;
2345 md->disk->first_minor = minor;
2346 md->disk->fops = &dm_blk_dops;
2347 md->disk->queue = md->queue;
2348 md->disk->private_data = md;
2349 sprintf(md->disk->disk_name, "dm-%d", minor);
2350 add_disk(md->disk);
2351 format_dev_t(md->name, MKDEV(_major, minor));
2352
2353 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
2354 if (!md->wq)
2355 goto bad;
2356
2357 md->bdev = bdget_disk(md->disk, 0);
2358 if (!md->bdev)
2359 goto bad;
2360
2361 bio_init(&md->flush_bio);
2362 md->flush_bio.bi_bdev = md->bdev;
2363 md->flush_bio.bi_rw = WRITE_FLUSH;
2364
2365 dm_stats_init(&md->stats);
2366
2367
2368 spin_lock(&_minor_lock);
2369 old_md = idr_replace(&_minor_idr, md, minor);
2370 spin_unlock(&_minor_lock);
2371
2372 BUG_ON(old_md != MINOR_ALLOCED);
2373
2374 return md;
2375
2376bad:
2377 cleanup_mapped_device(md);
2378bad_io_barrier:
2379 free_minor(minor);
2380bad_minor:
2381 module_put(THIS_MODULE);
2382bad_module_get:
2383 kfree(md);
2384 return NULL;
2385}
2386
2387static void unlock_fs(struct mapped_device *md);
2388
2389static void free_dev(struct mapped_device *md)
2390{
2391 int minor = MINOR(disk_devt(md->disk));
2392
2393 unlock_fs(md);
2394
2395 cleanup_mapped_device(md);
2396 if (md->use_blk_mq)
2397 blk_mq_free_tag_set(&md->tag_set);
2398
2399 free_table_devices(&md->table_devices);
2400 dm_stats_cleanup(&md->stats);
2401 free_minor(minor);
2402
2403 module_put(THIS_MODULE);
2404 kfree(md);
2405}
2406
2407static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2408{
2409 struct dm_md_mempools *p = dm_table_get_md_mempools(t);
2410
2411 if (md->bs) {
2412
2413 if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
2414
2415
2416
2417
2418 bioset_free(md->bs);
2419 md->bs = p->bs;
2420 p->bs = NULL;
2421 }
2422
2423
2424
2425
2426
2427
2428
2429
2430 goto out;
2431 }
2432
2433 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
2434
2435 md->io_pool = p->io_pool;
2436 p->io_pool = NULL;
2437 md->rq_pool = p->rq_pool;
2438 p->rq_pool = NULL;
2439 md->bs = p->bs;
2440 p->bs = NULL;
2441
2442out:
2443
2444 dm_table_free_md_mempools(t);
2445}
2446
2447
2448
2449
2450static void event_callback(void *context)
2451{
2452 unsigned long flags;
2453 LIST_HEAD(uevents);
2454 struct mapped_device *md = (struct mapped_device *) context;
2455
2456 spin_lock_irqsave(&md->uevent_lock, flags);
2457 list_splice_init(&md->uevent_list, &uevents);
2458 spin_unlock_irqrestore(&md->uevent_lock, flags);
2459
2460 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
2461
2462 atomic_inc(&md->event_nr);
2463 wake_up(&md->eventq);
2464}
2465
2466
2467
2468
2469static void __set_size(struct mapped_device *md, sector_t size)
2470{
2471 set_capacity(md->disk, size);
2472
2473 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
2474}
2475
2476
2477
2478
2479static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2480 struct queue_limits *limits)
2481{
2482 struct dm_table *old_map;
2483 struct request_queue *q = md->queue;
2484 sector_t size;
2485
2486 size = dm_table_get_size(t);
2487
2488
2489
2490
2491 if (size != dm_get_size(md))
2492 memset(&md->geometry, 0, sizeof(md->geometry));
2493
2494 __set_size(md, size);
2495
2496 dm_table_event_callback(t, event_callback, md);
2497
2498
2499
2500
2501
2502
2503
2504
2505 if (dm_table_request_based(t))
2506 stop_queue(q);
2507
2508 __bind_mempools(md, t);
2509
2510 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2511 rcu_assign_pointer(md->map, t);
2512 md->immutable_target_type = dm_table_get_immutable_target_type(t);
2513
2514 dm_table_set_restrictions(t, q, limits);
2515 if (old_map)
2516 dm_sync_table(md);
2517
2518 return old_map;
2519}
2520
2521
2522
2523
2524static struct dm_table *__unbind(struct mapped_device *md)
2525{
2526 struct dm_table *map = rcu_dereference_protected(md->map, 1);
2527
2528 if (!map)
2529 return NULL;
2530
2531 dm_table_event_callback(map, NULL, NULL);
2532 RCU_INIT_POINTER(md->map, NULL);
2533 dm_sync_table(md);
2534
2535 return map;
2536}
2537
2538
2539
2540
2541int dm_create(int minor, struct mapped_device **result)
2542{
2543 struct mapped_device *md;
2544
2545 md = alloc_dev(minor);
2546 if (!md)
2547 return -ENXIO;
2548
2549 dm_sysfs_init(md);
2550
2551 *result = md;
2552 return 0;
2553}
2554
2555
2556
2557
2558
2559void dm_lock_md_type(struct mapped_device *md)
2560{
2561 mutex_lock(&md->type_lock);
2562}
2563
2564void dm_unlock_md_type(struct mapped_device *md)
2565{
2566 mutex_unlock(&md->type_lock);
2567}
2568
2569void dm_set_md_type(struct mapped_device *md, unsigned type)
2570{
2571 BUG_ON(!mutex_is_locked(&md->type_lock));
2572 md->type = type;
2573}
2574
2575unsigned dm_get_md_type(struct mapped_device *md)
2576{
2577 BUG_ON(!mutex_is_locked(&md->type_lock));
2578 return md->type;
2579}
2580
2581struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2582{
2583 return md->immutable_target_type;
2584}
2585
2586
2587
2588
2589
2590struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2591{
2592 BUG_ON(!atomic_read(&md->holders));
2593 return &md->queue->limits;
2594}
2595EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2596
2597static void init_rq_based_worker_thread(struct mapped_device *md)
2598{
2599
2600 init_kthread_worker(&md->kworker);
2601 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2602 "kdmwork-%s", dm_device_name(md));
2603}
2604
2605
2606
2607
2608static int dm_init_request_based_queue(struct mapped_device *md)
2609{
2610 struct request_queue *q = NULL;
2611
2612
2613 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
2614 if (!q)
2615 return -EINVAL;
2616
2617
2618 md->seq_rq_merge_deadline_usecs = 0;
2619
2620 md->queue = q;
2621 dm_init_old_md_queue(md);
2622 blk_queue_softirq_done(md->queue, dm_softirq_done);
2623 blk_queue_prep_rq(md->queue, dm_prep_fn);
2624
2625 init_rq_based_worker_thread(md);
2626
2627 elv_register_queue(md->queue);
2628
2629 return 0;
2630}
2631
2632static int dm_mq_init_request(void *data, struct request *rq,
2633 unsigned int hctx_idx, unsigned int request_idx,
2634 unsigned int numa_node)
2635{
2636 struct mapped_device *md = data;
2637 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2638
2639
2640
2641
2642
2643 tio->md = md;
2644
2645 return 0;
2646}
2647
2648static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
2649 const struct blk_mq_queue_data *bd)
2650{
2651 struct request *rq = bd->rq;
2652 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2653 struct mapped_device *md = tio->md;
2654 int srcu_idx;
2655 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2656 struct dm_target *ti;
2657 sector_t pos;
2658
2659
2660 pos = 0;
2661 if (!(rq->cmd_flags & REQ_FLUSH))
2662 pos = blk_rq_pos(rq);
2663
2664 ti = dm_table_find_target(map, pos);
2665 if (!dm_target_is_valid(ti)) {
2666 dm_put_live_table(md, srcu_idx);
2667 DMERR_LIMIT("request attempted access beyond the end of device");
2668
2669
2670
2671
2672 dm_start_request(md, rq);
2673 return BLK_MQ_RQ_QUEUE_ERROR;
2674 }
2675 dm_put_live_table(md, srcu_idx);
2676
2677 if (ti->type->busy && ti->type->busy(ti))
2678 return BLK_MQ_RQ_QUEUE_BUSY;
2679
2680 dm_start_request(md, rq);
2681
2682
2683 init_tio(tio, rq, md);
2684
2685
2686
2687
2688
2689 tio->ti = ti;
2690
2691
2692 if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
2693
2694 tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
2695 (void) clone_rq(rq, md, tio, GFP_ATOMIC);
2696 queue_kthread_work(&md->kworker, &tio->work);
2697 } else {
2698
2699 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
2700
2701 rq_end_stats(md, rq);
2702 rq_completed(md, rq_data_dir(rq), false);
2703 return BLK_MQ_RQ_QUEUE_BUSY;
2704 }
2705 }
2706
2707 return BLK_MQ_RQ_QUEUE_OK;
2708}
2709
2710static struct blk_mq_ops dm_mq_ops = {
2711 .queue_rq = dm_mq_queue_rq,
2712 .map_queue = blk_mq_map_queue,
2713 .complete = dm_softirq_done,
2714 .init_request = dm_mq_init_request,
2715};
2716
2717static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
2718{
2719 unsigned md_type = dm_get_md_type(md);
2720 struct request_queue *q;
2721 int err;
2722
2723 memset(&md->tag_set, 0, sizeof(md->tag_set));
2724 md->tag_set.ops = &dm_mq_ops;
2725 md->tag_set.queue_depth = BLKDEV_MAX_RQ;
2726 md->tag_set.numa_node = NUMA_NO_NODE;
2727 md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2728 md->tag_set.nr_hw_queues = 1;
2729 if (md_type == DM_TYPE_REQUEST_BASED) {
2730
2731 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
2732 } else
2733 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
2734 md->tag_set.driver_data = md;
2735
2736 err = blk_mq_alloc_tag_set(&md->tag_set);
2737 if (err)
2738 return err;
2739
2740 q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
2741 if (IS_ERR(q)) {
2742 err = PTR_ERR(q);
2743 goto out_tag_set;
2744 }
2745 md->queue = q;
2746 dm_init_md_queue(md);
2747
2748
2749 blk_mq_register_disk(md->disk);
2750
2751 if (md_type == DM_TYPE_REQUEST_BASED)
2752 init_rq_based_worker_thread(md);
2753
2754 return 0;
2755
2756out_tag_set:
2757 blk_mq_free_tag_set(&md->tag_set);
2758 return err;
2759}
2760
2761static unsigned filter_md_type(unsigned type, struct mapped_device *md)
2762{
2763 if (type == DM_TYPE_BIO_BASED)
2764 return type;
2765
2766 return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
2767}
2768
2769
2770
2771
2772int dm_setup_md_queue(struct mapped_device *md)
2773{
2774 int r;
2775 unsigned md_type = filter_md_type(dm_get_md_type(md), md);
2776
2777 switch (md_type) {
2778 case DM_TYPE_REQUEST_BASED:
2779 r = dm_init_request_based_queue(md);
2780 if (r) {
2781 DMWARN("Cannot initialize queue for request-based mapped device");
2782 return r;
2783 }
2784 break;
2785 case DM_TYPE_MQ_REQUEST_BASED:
2786 r = dm_init_request_based_blk_mq_queue(md);
2787 if (r) {
2788 DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
2789 return r;
2790 }
2791 break;
2792 case DM_TYPE_BIO_BASED:
2793 dm_init_old_md_queue(md);
2794 blk_queue_make_request(md->queue, dm_make_request);
2795
2796
2797
2798
2799 bioset_free(md->queue->bio_split);
2800 md->queue->bio_split = NULL;
2801 break;
2802 }
2803
2804 return 0;
2805}
2806
2807struct mapped_device *dm_get_md(dev_t dev)
2808{
2809 struct mapped_device *md;
2810 unsigned minor = MINOR(dev);
2811
2812 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2813 return NULL;
2814
2815 spin_lock(&_minor_lock);
2816
2817 md = idr_find(&_minor_idr, minor);
2818 if (md) {
2819 if ((md == MINOR_ALLOCED ||
2820 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2821 dm_deleting_md(md) ||
2822 test_bit(DMF_FREEING, &md->flags))) {
2823 md = NULL;
2824 goto out;
2825 }
2826 dm_get(md);
2827 }
2828
2829out:
2830 spin_unlock(&_minor_lock);
2831
2832 return md;
2833}
2834EXPORT_SYMBOL_GPL(dm_get_md);
2835
2836void *dm_get_mdptr(struct mapped_device *md)
2837{
2838 return md->interface_ptr;
2839}
2840
2841void dm_set_mdptr(struct mapped_device *md, void *ptr)
2842{
2843 md->interface_ptr = ptr;
2844}
2845
2846void dm_get(struct mapped_device *md)
2847{
2848 atomic_inc(&md->holders);
2849 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2850}
2851
2852int dm_hold(struct mapped_device *md)
2853{
2854 spin_lock(&_minor_lock);
2855 if (test_bit(DMF_FREEING, &md->flags)) {
2856 spin_unlock(&_minor_lock);
2857 return -EBUSY;
2858 }
2859 dm_get(md);
2860 spin_unlock(&_minor_lock);
2861 return 0;
2862}
2863EXPORT_SYMBOL_GPL(dm_hold);
2864
2865const char *dm_device_name(struct mapped_device *md)
2866{
2867 return md->name;
2868}
2869EXPORT_SYMBOL_GPL(dm_device_name);
2870
2871static void __dm_destroy(struct mapped_device *md, bool wait)
2872{
2873 struct dm_table *map;
2874 int srcu_idx;
2875
2876 might_sleep();
2877
2878 spin_lock(&_minor_lock);
2879 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2880 set_bit(DMF_FREEING, &md->flags);
2881 spin_unlock(&_minor_lock);
2882
2883 if (dm_request_based(md) && md->kworker_task)
2884 flush_kthread_worker(&md->kworker);
2885
2886
2887
2888
2889
2890 mutex_lock(&md->suspend_lock);
2891 map = dm_get_live_table(md, &srcu_idx);
2892 if (!dm_suspended_md(md)) {
2893 dm_table_presuspend_targets(map);
2894 dm_table_postsuspend_targets(map);
2895 }
2896
2897 dm_put_live_table(md, srcu_idx);
2898 mutex_unlock(&md->suspend_lock);
2899
2900
2901
2902
2903
2904
2905
2906 if (wait)
2907 while (atomic_read(&md->holders))
2908 msleep(1);
2909 else if (atomic_read(&md->holders))
2910 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2911 dm_device_name(md), atomic_read(&md->holders));
2912
2913 dm_sysfs_exit(md);
2914 dm_table_destroy(__unbind(md));
2915 free_dev(md);
2916}
2917
2918void dm_destroy(struct mapped_device *md)
2919{
2920 __dm_destroy(md, true);
2921}
2922
2923void dm_destroy_immediate(struct mapped_device *md)
2924{
2925 __dm_destroy(md, false);
2926}
2927
2928void dm_put(struct mapped_device *md)
2929{
2930 atomic_dec(&md->holders);
2931}
2932EXPORT_SYMBOL_GPL(dm_put);
2933
2934static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2935{
2936 int r = 0;
2937 DECLARE_WAITQUEUE(wait, current);
2938
2939 add_wait_queue(&md->wait, &wait);
2940
2941 while (1) {
2942 set_current_state(interruptible);
2943
2944 if (!md_in_flight(md))
2945 break;
2946
2947 if (interruptible == TASK_INTERRUPTIBLE &&
2948 signal_pending(current)) {
2949 r = -EINTR;
2950 break;
2951 }
2952
2953 io_schedule();
2954 }
2955 set_current_state(TASK_RUNNING);
2956
2957 remove_wait_queue(&md->wait, &wait);
2958
2959 return r;
2960}
2961
2962
2963
2964
2965static void dm_wq_work(struct work_struct *work)
2966{
2967 struct mapped_device *md = container_of(work, struct mapped_device,
2968 work);
2969 struct bio *c;
2970 int srcu_idx;
2971 struct dm_table *map;
2972
2973 map = dm_get_live_table(md, &srcu_idx);
2974
2975 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2976 spin_lock_irq(&md->deferred_lock);
2977 c = bio_list_pop(&md->deferred);
2978 spin_unlock_irq(&md->deferred_lock);
2979
2980 if (!c)
2981 break;
2982
2983 if (dm_request_based(md))
2984 generic_make_request(c);
2985 else
2986 __split_and_process_bio(md, map, c);
2987 }
2988
2989 dm_put_live_table(md, srcu_idx);
2990}
2991
2992static void dm_queue_flush(struct mapped_device *md)
2993{
2994 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2995 smp_mb__after_atomic();
2996 queue_work(md->wq, &md->work);
2997}
2998
2999
3000
3001
3002struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
3003{
3004 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
3005 struct queue_limits limits;
3006 int r;
3007
3008 mutex_lock(&md->suspend_lock);
3009
3010
3011 if (!dm_suspended_md(md))
3012 goto out;
3013
3014
3015
3016
3017
3018
3019
3020 if (dm_table_has_no_data_devices(table)) {
3021 live_map = dm_get_live_table_fast(md);
3022 if (live_map)
3023 limits = md->queue->limits;
3024 dm_put_live_table_fast(md);
3025 }
3026
3027 if (!live_map) {
3028 r = dm_calculate_queue_limits(table, &limits);
3029 if (r) {
3030 map = ERR_PTR(r);
3031 goto out;
3032 }
3033 }
3034
3035 map = __bind(md, table, &limits);
3036
3037out:
3038 mutex_unlock(&md->suspend_lock);
3039 return map;
3040}
3041
3042
3043
3044
3045
3046static int lock_fs(struct mapped_device *md)
3047{
3048 int r;
3049
3050 WARN_ON(md->frozen_sb);
3051
3052 md->frozen_sb = freeze_bdev(md->bdev);
3053 if (IS_ERR(md->frozen_sb)) {
3054 r = PTR_ERR(md->frozen_sb);
3055 md->frozen_sb = NULL;
3056 return r;
3057 }
3058
3059 set_bit(DMF_FROZEN, &md->flags);
3060
3061 return 0;
3062}
3063
3064static void unlock_fs(struct mapped_device *md)
3065{
3066 if (!test_bit(DMF_FROZEN, &md->flags))
3067 return;
3068
3069 thaw_bdev(md->bdev, md->frozen_sb);
3070 md->frozen_sb = NULL;
3071 clear_bit(DMF_FROZEN, &md->flags);
3072}
3073
3074
3075
3076
3077
3078
3079
3080
3081static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
3082 unsigned suspend_flags, int interruptible)
3083{
3084 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
3085 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
3086 int r;
3087
3088
3089
3090
3091
3092 if (noflush)
3093 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
3094
3095
3096
3097
3098
3099 dm_table_presuspend_targets(map);
3100
3101
3102
3103
3104
3105
3106
3107 if (!noflush && do_lockfs) {
3108 r = lock_fs(md);
3109 if (r) {
3110 dm_table_presuspend_undo_targets(map);
3111 return r;
3112 }
3113 }
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
3128 if (map)
3129 synchronize_srcu(&md->io_barrier);
3130
3131
3132
3133
3134
3135 if (dm_request_based(md)) {
3136 stop_queue(md->queue);
3137 if (md->kworker_task)
3138 flush_kthread_worker(&md->kworker);
3139 }
3140
3141 flush_workqueue(md->wq);
3142
3143
3144
3145
3146
3147
3148 r = dm_wait_for_completion(md, interruptible);
3149
3150 if (noflush)
3151 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
3152 if (map)
3153 synchronize_srcu(&md->io_barrier);
3154
3155
3156 if (r < 0) {
3157 dm_queue_flush(md);
3158
3159 if (dm_request_based(md))
3160 start_queue(md->queue);
3161
3162 unlock_fs(md);
3163 dm_table_presuspend_undo_targets(map);
3164
3165 }
3166
3167 return r;
3168}
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
3187{
3188 struct dm_table *map = NULL;
3189 int r = 0;
3190
3191retry:
3192 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
3193
3194 if (dm_suspended_md(md)) {
3195 r = -EINVAL;
3196 goto out_unlock;
3197 }
3198
3199 if (dm_suspended_internally_md(md)) {
3200
3201 mutex_unlock(&md->suspend_lock);
3202 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
3203 if (r)
3204 return r;
3205 goto retry;
3206 }
3207
3208 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3209
3210 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
3211 if (r)
3212 goto out_unlock;
3213
3214 set_bit(DMF_SUSPENDED, &md->flags);
3215
3216 dm_table_postsuspend_targets(map);
3217
3218out_unlock:
3219 mutex_unlock(&md->suspend_lock);
3220 return r;
3221}
3222
3223static int __dm_resume(struct mapped_device *md, struct dm_table *map)
3224{
3225 if (map) {
3226 int r = dm_table_resume_targets(map);
3227 if (r)
3228 return r;
3229 }
3230
3231 dm_queue_flush(md);
3232
3233
3234
3235
3236
3237
3238 if (dm_request_based(md))
3239 start_queue(md->queue);
3240
3241 unlock_fs(md);
3242
3243 return 0;
3244}
3245
3246int dm_resume(struct mapped_device *md)
3247{
3248 int r = -EINVAL;
3249 struct dm_table *map = NULL;
3250
3251retry:
3252 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
3253
3254 if (!dm_suspended_md(md))
3255 goto out;
3256
3257 if (dm_suspended_internally_md(md)) {
3258
3259 mutex_unlock(&md->suspend_lock);
3260 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
3261 if (r)
3262 return r;
3263 goto retry;
3264 }
3265
3266 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3267 if (!map || !dm_table_get_size(map))
3268 goto out;
3269
3270 r = __dm_resume(md, map);
3271 if (r)
3272 goto out;
3273
3274 clear_bit(DMF_SUSPENDED, &md->flags);
3275
3276 r = 0;
3277out:
3278 mutex_unlock(&md->suspend_lock);
3279
3280 return r;
3281}
3282
3283
3284
3285
3286
3287
3288
3289static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
3290{
3291 struct dm_table *map = NULL;
3292
3293 if (md->internal_suspend_count++)
3294 return;
3295
3296 if (dm_suspended_md(md)) {
3297 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3298 return;
3299 }
3300
3301 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3302
3303
3304
3305
3306
3307
3308
3309 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
3310
3311 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3312
3313 dm_table_postsuspend_targets(map);
3314}
3315
3316static void __dm_internal_resume(struct mapped_device *md)
3317{
3318 BUG_ON(!md->internal_suspend_count);
3319
3320 if (--md->internal_suspend_count)
3321 return;
3322
3323 if (dm_suspended_md(md))
3324 goto done;
3325
3326
3327
3328
3329
3330 (void) __dm_resume(md, NULL);
3331
3332done:
3333 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3334 smp_mb__after_atomic();
3335 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
3336}
3337
3338void dm_internal_suspend_noflush(struct mapped_device *md)
3339{
3340 mutex_lock(&md->suspend_lock);
3341 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
3342 mutex_unlock(&md->suspend_lock);
3343}
3344EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
3345
3346void dm_internal_resume(struct mapped_device *md)
3347{
3348 mutex_lock(&md->suspend_lock);
3349 __dm_internal_resume(md);
3350 mutex_unlock(&md->suspend_lock);
3351}
3352EXPORT_SYMBOL_GPL(dm_internal_resume);
3353
3354
3355
3356
3357
3358
3359void dm_internal_suspend_fast(struct mapped_device *md)
3360{
3361 mutex_lock(&md->suspend_lock);
3362 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
3363 return;
3364
3365 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
3366 synchronize_srcu(&md->io_barrier);
3367 flush_workqueue(md->wq);
3368 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
3369}
3370EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
3371
3372void dm_internal_resume_fast(struct mapped_device *md)
3373{
3374 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
3375 goto done;
3376
3377 dm_queue_flush(md);
3378
3379done:
3380 mutex_unlock(&md->suspend_lock);
3381}
3382EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
3383
3384
3385
3386
3387int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
3388 unsigned cookie)
3389{
3390 char udev_cookie[DM_COOKIE_LENGTH];
3391 char *envp[] = { udev_cookie, NULL };
3392
3393 if (!cookie)
3394 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
3395 else {
3396 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
3397 DM_COOKIE_ENV_VAR_NAME, cookie);
3398 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
3399 action, envp);
3400 }
3401}
3402
3403uint32_t dm_next_uevent_seq(struct mapped_device *md)
3404{
3405 return atomic_add_return(1, &md->uevent_seq);
3406}
3407
3408uint32_t dm_get_event_nr(struct mapped_device *md)
3409{
3410 return atomic_read(&md->event_nr);
3411}
3412
3413int dm_wait_event(struct mapped_device *md, int event_nr)
3414{
3415 return wait_event_interruptible(md->eventq,
3416 (event_nr != atomic_read(&md->event_nr)));
3417}
3418
3419void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
3420{
3421 unsigned long flags;
3422
3423 spin_lock_irqsave(&md->uevent_lock, flags);
3424 list_add(elist, &md->uevent_list);
3425 spin_unlock_irqrestore(&md->uevent_lock, flags);
3426}
3427
3428
3429
3430
3431
3432struct gendisk *dm_disk(struct mapped_device *md)
3433{
3434 return md->disk;
3435}
3436EXPORT_SYMBOL_GPL(dm_disk);
3437
3438struct kobject *dm_kobject(struct mapped_device *md)
3439{
3440 return &md->kobj_holder.kobj;
3441}
3442
3443struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
3444{
3445 struct mapped_device *md;
3446
3447 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
3448
3449 if (test_bit(DMF_FREEING, &md->flags) ||
3450 dm_deleting_md(md))
3451 return NULL;
3452
3453 dm_get(md);
3454 return md;
3455}
3456
3457int dm_suspended_md(struct mapped_device *md)
3458{
3459 return test_bit(DMF_SUSPENDED, &md->flags);
3460}
3461
3462int dm_suspended_internally_md(struct mapped_device *md)
3463{
3464 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3465}
3466
3467int dm_test_deferred_remove_flag(struct mapped_device *md)
3468{
3469 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
3470}
3471
3472int dm_suspended(struct dm_target *ti)
3473{
3474 return dm_suspended_md(dm_table_get_md(ti->table));
3475}
3476EXPORT_SYMBOL_GPL(dm_suspended);
3477
3478int dm_noflush_suspending(struct dm_target *ti)
3479{
3480 return __noflush_suspending(dm_table_get_md(ti->table));
3481}
3482EXPORT_SYMBOL_GPL(dm_noflush_suspending);
3483
3484struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
3485 unsigned integrity, unsigned per_bio_data_size)
3486{
3487 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
3488 struct kmem_cache *cachep = NULL;
3489 unsigned int pool_size = 0;
3490 unsigned int front_pad;
3491
3492 if (!pools)
3493 return NULL;
3494
3495 type = filter_md_type(type, md);
3496
3497 switch (type) {
3498 case DM_TYPE_BIO_BASED:
3499 cachep = _io_cache;
3500 pool_size = dm_get_reserved_bio_based_ios();
3501 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
3502 break;
3503 case DM_TYPE_REQUEST_BASED:
3504 cachep = _rq_tio_cache;
3505 pool_size = dm_get_reserved_rq_based_ios();
3506 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
3507 if (!pools->rq_pool)
3508 goto out;
3509
3510 case DM_TYPE_MQ_REQUEST_BASED:
3511 if (!pool_size)
3512 pool_size = dm_get_reserved_rq_based_ios();
3513 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
3514
3515 WARN_ON(per_bio_data_size != 0);
3516 break;
3517 default:
3518 BUG();
3519 }
3520
3521 if (cachep) {
3522 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
3523 if (!pools->io_pool)
3524 goto out;
3525 }
3526
3527 pools->bs = bioset_create_nobvec(pool_size, front_pad);
3528 if (!pools->bs)
3529 goto out;
3530
3531 if (integrity && bioset_integrity_create(pools->bs, pool_size))
3532 goto out;
3533
3534 return pools;
3535
3536out:
3537 dm_free_md_mempools(pools);
3538
3539 return NULL;
3540}
3541
3542void dm_free_md_mempools(struct dm_md_mempools *pools)
3543{
3544 if (!pools)
3545 return;
3546
3547 mempool_destroy(pools->io_pool);
3548 mempool_destroy(pools->rq_pool);
3549
3550 if (pools->bs)
3551 bioset_free(pools->bs);
3552
3553 kfree(pools);
3554}
3555
3556static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
3557 u32 flags)
3558{
3559 struct mapped_device *md = bdev->bd_disk->private_data;
3560 const struct pr_ops *ops;
3561 struct dm_target *tgt;
3562 fmode_t mode;
3563 int srcu_idx, r;
3564
3565 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
3566 if (r < 0)
3567 return r;
3568
3569 ops = bdev->bd_disk->fops->pr_ops;
3570 if (ops && ops->pr_register)
3571 r = ops->pr_register(bdev, old_key, new_key, flags);
3572 else
3573 r = -EOPNOTSUPP;
3574
3575 dm_put_live_table(md, srcu_idx);
3576 return r;
3577}
3578
3579static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
3580 u32 flags)
3581{
3582 struct mapped_device *md = bdev->bd_disk->private_data;
3583 const struct pr_ops *ops;
3584 struct dm_target *tgt;
3585 fmode_t mode;
3586 int srcu_idx, r;
3587
3588 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
3589 if (r < 0)
3590 return r;
3591
3592 ops = bdev->bd_disk->fops->pr_ops;
3593 if (ops && ops->pr_reserve)
3594 r = ops->pr_reserve(bdev, key, type, flags);
3595 else
3596 r = -EOPNOTSUPP;
3597
3598 dm_put_live_table(md, srcu_idx);
3599 return r;
3600}
3601
3602static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
3603{
3604 struct mapped_device *md = bdev->bd_disk->private_data;
3605 const struct pr_ops *ops;
3606 struct dm_target *tgt;
3607 fmode_t mode;
3608 int srcu_idx, r;
3609
3610 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
3611 if (r < 0)
3612 return r;
3613
3614 ops = bdev->bd_disk->fops->pr_ops;
3615 if (ops && ops->pr_release)
3616 r = ops->pr_release(bdev, key, type);
3617 else
3618 r = -EOPNOTSUPP;
3619
3620 dm_put_live_table(md, srcu_idx);
3621 return r;
3622}
3623
3624static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
3625 enum pr_type type, bool abort)
3626{
3627 struct mapped_device *md = bdev->bd_disk->private_data;
3628 const struct pr_ops *ops;
3629 struct dm_target *tgt;
3630 fmode_t mode;
3631 int srcu_idx, r;
3632
3633 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
3634 if (r < 0)
3635 return r;
3636
3637 ops = bdev->bd_disk->fops->pr_ops;
3638 if (ops && ops->pr_preempt)
3639 r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
3640 else
3641 r = -EOPNOTSUPP;
3642
3643 dm_put_live_table(md, srcu_idx);
3644 return r;
3645}
3646
3647static int dm_pr_clear(struct block_device *bdev, u64 key)
3648{
3649 struct mapped_device *md = bdev->bd_disk->private_data;
3650 const struct pr_ops *ops;
3651 struct dm_target *tgt;
3652 fmode_t mode;
3653 int srcu_idx, r;
3654
3655 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
3656 if (r < 0)
3657 return r;
3658
3659 ops = bdev->bd_disk->fops->pr_ops;
3660 if (ops && ops->pr_clear)
3661 r = ops->pr_clear(bdev, key);
3662 else
3663 r = -EOPNOTSUPP;
3664
3665 dm_put_live_table(md, srcu_idx);
3666 return r;
3667}
3668
3669static const struct pr_ops dm_pr_ops = {
3670 .pr_register = dm_pr_register,
3671 .pr_reserve = dm_pr_reserve,
3672 .pr_release = dm_pr_release,
3673 .pr_preempt = dm_pr_preempt,
3674 .pr_clear = dm_pr_clear,
3675};
3676
3677static const struct block_device_operations dm_blk_dops = {
3678 .open = dm_blk_open,
3679 .release = dm_blk_close,
3680 .ioctl = dm_blk_ioctl,
3681 .getgeo = dm_blk_getgeo,
3682 .pr_ops = &dm_pr_ops,
3683 .owner = THIS_MODULE
3684};
3685
3686
3687
3688
3689module_init(dm_init);
3690module_exit(dm_exit);
3691
3692module_param(major, uint, 0);
3693MODULE_PARM_DESC(major, "The major number of the device mapper");
3694
3695module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3696MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3697
3698module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
3699MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
3700
3701module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
3702MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
3703
3704MODULE_DESCRIPTION(DM_NAME " driver");
3705MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3706MODULE_LICENSE("GPL");
3707