1
2
3
4
5
6
7
8#include "dm.h"
9#include "dm-uevent.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/mutex.h>
14#include <linux/moduleparam.h>
15#include <linux/blkpg.h>
16#include <linux/bio.h>
17#include <linux/mempool.h>
18#include <linux/slab.h>
19#include <linux/idr.h>
20#include <linux/hdreg.h>
21#include <linux/delay.h>
22#include <linux/wait.h>
23#include <linux/kthread.h>
24
25#include <trace/events/block.h>
26
27#define DM_MSG_PREFIX "core"
28
29#ifdef CONFIG_PRINTK
30
31
32
33DEFINE_RATELIMIT_STATE(dm_ratelimit_state,
34 DEFAULT_RATELIMIT_INTERVAL,
35 DEFAULT_RATELIMIT_BURST);
36EXPORT_SYMBOL(dm_ratelimit_state);
37#endif
38
39
40
41
42
43#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
44#define DM_COOKIE_LENGTH 24
45
46static const char *_name = DM_NAME;
47
48static unsigned int major = 0;
49static unsigned int _major = 0;
50
51static DEFINE_IDR(_minor_idr);
52
53static DEFINE_SPINLOCK(_minor_lock);
54
55static void do_deferred_remove(struct work_struct *w);
56
57static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
58
59static struct workqueue_struct *deferred_remove_workqueue;
60
61
62
63
64
65struct dm_io {
66 struct mapped_device *md;
67 int error;
68 atomic_t io_count;
69 struct bio *bio;
70 unsigned long start_time;
71 spinlock_t endio_lock;
72 struct dm_stats_aux stats_aux;
73};
74
75
76
77
78
79struct dm_rq_target_io {
80 struct mapped_device *md;
81 struct dm_target *ti;
82 struct request *orig, *clone;
83 struct kthread_work work;
84 int error;
85 union map_info info;
86};
87
88
89
90
91
92
93
94
95
96struct dm_rq_clone_bio_info {
97 struct bio *orig;
98 struct dm_rq_target_io *tio;
99 struct bio clone;
100};
101
102union map_info *dm_get_rq_mapinfo(struct request *rq)
103{
104 if (rq && rq->end_io_data)
105 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
106 return NULL;
107}
108EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
109
110#define MINOR_ALLOCED ((void *)-1)
111
112
113
114
115#define DMF_BLOCK_IO_FOR_SUSPEND 0
116#define DMF_SUSPENDED 1
117#define DMF_FROZEN 2
118#define DMF_FREEING 3
119#define DMF_DELETING 4
120#define DMF_NOFLUSH_SUSPENDING 5
121#define DMF_MERGE_IS_OPTIONAL 6
122#define DMF_DEFERRED_REMOVE 7
123#define DMF_SUSPENDED_INTERNALLY 8
124
125
126
127
128
129struct dm_table {
130 int undefined__;
131};
132
133
134
135
136struct mapped_device {
137 struct srcu_struct io_barrier;
138 struct mutex suspend_lock;
139 atomic_t holders;
140 atomic_t open_count;
141
142
143
144
145
146
147 struct dm_table __rcu *map;
148
149 struct list_head table_devices;
150 struct mutex table_devices_lock;
151
152 unsigned long flags;
153
154 struct request_queue *queue;
155 unsigned type;
156
157 struct mutex type_lock;
158
159 struct target_type *immutable_target_type;
160
161 struct gendisk *disk;
162 char name[16];
163
164 void *interface_ptr;
165
166
167
168
169 atomic_t pending[2];
170 wait_queue_head_t wait;
171 struct work_struct work;
172 struct bio_list deferred;
173 spinlock_t deferred_lock;
174
175
176
177
178 struct workqueue_struct *wq;
179
180
181
182
183 mempool_t *io_pool;
184 mempool_t *rq_pool;
185
186 struct bio_set *bs;
187
188
189
190
191 atomic_t event_nr;
192 wait_queue_head_t eventq;
193 atomic_t uevent_seq;
194 struct list_head uevent_list;
195 spinlock_t uevent_lock;
196
197
198
199
200 struct super_block *frozen_sb;
201 struct block_device *bdev;
202
203
204 struct hd_geometry geometry;
205
206
207 struct dm_kobject_holder kobj_holder;
208
209
210 struct bio flush_bio;
211
212
213 unsigned internal_suspend_count;
214
215 struct dm_stats stats;
216
217 struct kthread_worker kworker;
218 struct task_struct *kworker_task;
219};
220
221
222
223
224struct dm_md_mempools {
225 mempool_t *io_pool;
226 mempool_t *rq_pool;
227 struct bio_set *bs;
228};
229
230struct table_device {
231 struct list_head list;
232 atomic_t count;
233 struct dm_dev dm_dev;
234};
235
236#define RESERVED_BIO_BASED_IOS 16
237#define RESERVED_REQUEST_BASED_IOS 256
238#define RESERVED_MAX_IOS 1024
239static struct kmem_cache *_io_cache;
240static struct kmem_cache *_rq_tio_cache;
241static struct kmem_cache *_rq_cache;
242
243
244
245
246static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
247
248
249
250
251static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
252
253static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
254 unsigned def, unsigned max)
255{
256 unsigned ios = ACCESS_ONCE(*reserved_ios);
257 unsigned modified_ios = 0;
258
259 if (!ios)
260 modified_ios = def;
261 else if (ios > max)
262 modified_ios = max;
263
264 if (modified_ios) {
265 (void)cmpxchg(reserved_ios, ios, modified_ios);
266 ios = modified_ios;
267 }
268
269 return ios;
270}
271
272unsigned dm_get_reserved_bio_based_ios(void)
273{
274 return __dm_get_reserved_ios(&reserved_bio_based_ios,
275 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
276}
277EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
278
279unsigned dm_get_reserved_rq_based_ios(void)
280{
281 return __dm_get_reserved_ios(&reserved_rq_based_ios,
282 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
283}
284EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
285
286static int __init local_init(void)
287{
288 int r = -ENOMEM;
289
290
291 _io_cache = KMEM_CACHE(dm_io, 0);
292 if (!_io_cache)
293 return r;
294
295 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
296 if (!_rq_tio_cache)
297 goto out_free_io_cache;
298
299 _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
300 __alignof__(struct request), 0, NULL);
301 if (!_rq_cache)
302 goto out_free_rq_tio_cache;
303
304 r = dm_uevent_init();
305 if (r)
306 goto out_free_rq_cache;
307
308 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
309 if (!deferred_remove_workqueue) {
310 r = -ENOMEM;
311 goto out_uevent_exit;
312 }
313
314 _major = major;
315 r = register_blkdev(_major, _name);
316 if (r < 0)
317 goto out_free_workqueue;
318
319 if (!_major)
320 _major = r;
321
322 return 0;
323
324out_free_workqueue:
325 destroy_workqueue(deferred_remove_workqueue);
326out_uevent_exit:
327 dm_uevent_exit();
328out_free_rq_cache:
329 kmem_cache_destroy(_rq_cache);
330out_free_rq_tio_cache:
331 kmem_cache_destroy(_rq_tio_cache);
332out_free_io_cache:
333 kmem_cache_destroy(_io_cache);
334
335 return r;
336}
337
338static void local_exit(void)
339{
340 flush_scheduled_work();
341 destroy_workqueue(deferred_remove_workqueue);
342
343 kmem_cache_destroy(_rq_cache);
344 kmem_cache_destroy(_rq_tio_cache);
345 kmem_cache_destroy(_io_cache);
346 unregister_blkdev(_major, _name);
347 dm_uevent_exit();
348
349 _major = 0;
350
351 DMINFO("cleaned up");
352}
353
354static int (*_inits[])(void) __initdata = {
355 local_init,
356 dm_target_init,
357 dm_linear_init,
358 dm_stripe_init,
359 dm_io_init,
360 dm_kcopyd_init,
361 dm_interface_init,
362 dm_statistics_init,
363};
364
365static void (*_exits[])(void) = {
366 local_exit,
367 dm_target_exit,
368 dm_linear_exit,
369 dm_stripe_exit,
370 dm_io_exit,
371 dm_kcopyd_exit,
372 dm_interface_exit,
373 dm_statistics_exit,
374};
375
376static int __init dm_init(void)
377{
378 const int count = ARRAY_SIZE(_inits);
379
380 int r, i;
381
382 for (i = 0; i < count; i++) {
383 r = _inits[i]();
384 if (r)
385 goto bad;
386 }
387
388 return 0;
389
390 bad:
391 while (i--)
392 _exits[i]();
393
394 return r;
395}
396
397static void __exit dm_exit(void)
398{
399 int i = ARRAY_SIZE(_exits);
400
401 while (i--)
402 _exits[i]();
403
404
405
406
407 idr_destroy(&_minor_idr);
408}
409
410
411
412
413int dm_deleting_md(struct mapped_device *md)
414{
415 return test_bit(DMF_DELETING, &md->flags);
416}
417
418static int dm_blk_open(struct block_device *bdev, fmode_t mode)
419{
420 struct mapped_device *md;
421
422 spin_lock(&_minor_lock);
423
424 md = bdev->bd_disk->private_data;
425 if (!md)
426 goto out;
427
428 if (test_bit(DMF_FREEING, &md->flags) ||
429 dm_deleting_md(md)) {
430 md = NULL;
431 goto out;
432 }
433
434 dm_get(md);
435 atomic_inc(&md->open_count);
436out:
437 spin_unlock(&_minor_lock);
438
439 return md ? 0 : -ENXIO;
440}
441
442static void dm_blk_close(struct gendisk *disk, fmode_t mode)
443{
444 struct mapped_device *md;
445
446 spin_lock(&_minor_lock);
447
448 md = disk->private_data;
449 if (WARN_ON(!md))
450 goto out;
451
452 if (atomic_dec_and_test(&md->open_count) &&
453 (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
454 queue_work(deferred_remove_workqueue, &deferred_remove_work);
455
456 dm_put(md);
457out:
458 spin_unlock(&_minor_lock);
459}
460
461int dm_open_count(struct mapped_device *md)
462{
463 return atomic_read(&md->open_count);
464}
465
466
467
468
469int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
470{
471 int r = 0;
472
473 spin_lock(&_minor_lock);
474
475 if (dm_open_count(md)) {
476 r = -EBUSY;
477 if (mark_deferred)
478 set_bit(DMF_DEFERRED_REMOVE, &md->flags);
479 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
480 r = -EEXIST;
481 else
482 set_bit(DMF_DELETING, &md->flags);
483
484 spin_unlock(&_minor_lock);
485
486 return r;
487}
488
489int dm_cancel_deferred_remove(struct mapped_device *md)
490{
491 int r = 0;
492
493 spin_lock(&_minor_lock);
494
495 if (test_bit(DMF_DELETING, &md->flags))
496 r = -EBUSY;
497 else
498 clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
499
500 spin_unlock(&_minor_lock);
501
502 return r;
503}
504
505static void do_deferred_remove(struct work_struct *w)
506{
507 dm_deferred_remove();
508}
509
510sector_t dm_get_size(struct mapped_device *md)
511{
512 return get_capacity(md->disk);
513}
514
515struct request_queue *dm_get_md_queue(struct mapped_device *md)
516{
517 return md->queue;
518}
519
520struct dm_stats *dm_get_stats(struct mapped_device *md)
521{
522 return &md->stats;
523}
524
525static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
526{
527 struct mapped_device *md = bdev->bd_disk->private_data;
528
529 return dm_get_geometry(md, geo);
530}
531
532static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
533 unsigned int cmd, unsigned long arg)
534{
535 struct mapped_device *md = bdev->bd_disk->private_data;
536 int srcu_idx;
537 struct dm_table *map;
538 struct dm_target *tgt;
539 int r = -ENOTTY;
540
541retry:
542 map = dm_get_live_table(md, &srcu_idx);
543
544 if (!map || !dm_table_get_size(map))
545 goto out;
546
547
548 if (dm_table_get_num_targets(map) != 1)
549 goto out;
550
551 tgt = dm_table_get_target(map, 0);
552 if (!tgt->type->ioctl)
553 goto out;
554
555 if (dm_suspended_md(md)) {
556 r = -EAGAIN;
557 goto out;
558 }
559
560 r = tgt->type->ioctl(tgt, cmd, arg);
561
562out:
563 dm_put_live_table(md, srcu_idx);
564
565 if (r == -ENOTCONN) {
566 msleep(10);
567 goto retry;
568 }
569
570 return r;
571}
572
573static struct dm_io *alloc_io(struct mapped_device *md)
574{
575 return mempool_alloc(md->io_pool, GFP_NOIO);
576}
577
578static void free_io(struct mapped_device *md, struct dm_io *io)
579{
580 mempool_free(io, md->io_pool);
581}
582
583static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
584{
585 bio_put(&tio->clone);
586}
587
588static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
589 gfp_t gfp_mask)
590{
591 return mempool_alloc(md->io_pool, gfp_mask);
592}
593
594static void free_rq_tio(struct dm_rq_target_io *tio)
595{
596 mempool_free(tio, tio->md->io_pool);
597}
598
599static struct request *alloc_clone_request(struct mapped_device *md,
600 gfp_t gfp_mask)
601{
602 return mempool_alloc(md->rq_pool, gfp_mask);
603}
604
605static void free_clone_request(struct mapped_device *md, struct request *rq)
606{
607 mempool_free(rq, md->rq_pool);
608}
609
610static int md_in_flight(struct mapped_device *md)
611{
612 return atomic_read(&md->pending[READ]) +
613 atomic_read(&md->pending[WRITE]);
614}
615
616static void start_io_acct(struct dm_io *io)
617{
618 struct mapped_device *md = io->md;
619 struct bio *bio = io->bio;
620 int cpu;
621 int rw = bio_data_dir(bio);
622
623 io->start_time = jiffies;
624
625 cpu = part_stat_lock();
626 part_round_stats(cpu, &dm_disk(md)->part0);
627 part_stat_unlock();
628 atomic_set(&dm_disk(md)->part0.in_flight[rw],
629 atomic_inc_return(&md->pending[rw]));
630
631 if (unlikely(dm_stats_used(&md->stats)))
632 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
633 bio_sectors(bio), false, 0, &io->stats_aux);
634}
635
636static void end_io_acct(struct dm_io *io)
637{
638 struct mapped_device *md = io->md;
639 struct bio *bio = io->bio;
640 unsigned long duration = jiffies - io->start_time;
641 int pending;
642 int rw = bio_data_dir(bio);
643
644 generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
645
646 if (unlikely(dm_stats_used(&md->stats)))
647 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
648 bio_sectors(bio), true, duration, &io->stats_aux);
649
650
651
652
653
654 pending = atomic_dec_return(&md->pending[rw]);
655 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
656 pending += atomic_read(&md->pending[rw^0x1]);
657
658
659 if (!pending)
660 wake_up(&md->wait);
661}
662
663
664
665
666static void queue_io(struct mapped_device *md, struct bio *bio)
667{
668 unsigned long flags;
669
670 spin_lock_irqsave(&md->deferred_lock, flags);
671 bio_list_add(&md->deferred, bio);
672 spin_unlock_irqrestore(&md->deferred_lock, flags);
673 queue_work(md->wq, &md->work);
674}
675
676
677
678
679
680
681struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
682{
683 *srcu_idx = srcu_read_lock(&md->io_barrier);
684
685 return srcu_dereference(md->map, &md->io_barrier);
686}
687
688void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
689{
690 srcu_read_unlock(&md->io_barrier, srcu_idx);
691}
692
693void dm_sync_table(struct mapped_device *md)
694{
695 synchronize_srcu(&md->io_barrier);
696 synchronize_rcu_expedited();
697}
698
699
700
701
702
703static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
704{
705 rcu_read_lock();
706 return rcu_dereference(md->map);
707}
708
709static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
710{
711 rcu_read_unlock();
712}
713
714
715
716
717static int open_table_device(struct table_device *td, dev_t dev,
718 struct mapped_device *md)
719{
720 static char *_claim_ptr = "I belong to device-mapper";
721 struct block_device *bdev;
722
723 int r;
724
725 BUG_ON(td->dm_dev.bdev);
726
727 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
728 if (IS_ERR(bdev))
729 return PTR_ERR(bdev);
730
731 r = bd_link_disk_holder(bdev, dm_disk(md));
732 if (r) {
733 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
734 return r;
735 }
736
737 td->dm_dev.bdev = bdev;
738 return 0;
739}
740
741
742
743
744static void close_table_device(struct table_device *td, struct mapped_device *md)
745{
746 if (!td->dm_dev.bdev)
747 return;
748
749 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
750 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
751 td->dm_dev.bdev = NULL;
752}
753
754static struct table_device *find_table_device(struct list_head *l, dev_t dev,
755 fmode_t mode) {
756 struct table_device *td;
757
758 list_for_each_entry(td, l, list)
759 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
760 return td;
761
762 return NULL;
763}
764
765int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
766 struct dm_dev **result) {
767 int r;
768 struct table_device *td;
769
770 mutex_lock(&md->table_devices_lock);
771 td = find_table_device(&md->table_devices, dev, mode);
772 if (!td) {
773 td = kmalloc(sizeof(*td), GFP_KERNEL);
774 if (!td) {
775 mutex_unlock(&md->table_devices_lock);
776 return -ENOMEM;
777 }
778
779 td->dm_dev.mode = mode;
780 td->dm_dev.bdev = NULL;
781
782 if ((r = open_table_device(td, dev, md))) {
783 mutex_unlock(&md->table_devices_lock);
784 kfree(td);
785 return r;
786 }
787
788 format_dev_t(td->dm_dev.name, dev);
789
790 atomic_set(&td->count, 0);
791 list_add(&td->list, &md->table_devices);
792 }
793 atomic_inc(&td->count);
794 mutex_unlock(&md->table_devices_lock);
795
796 *result = &td->dm_dev;
797 return 0;
798}
799EXPORT_SYMBOL_GPL(dm_get_table_device);
800
801void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
802{
803 struct table_device *td = container_of(d, struct table_device, dm_dev);
804
805 mutex_lock(&md->table_devices_lock);
806 if (atomic_dec_and_test(&td->count)) {
807 close_table_device(td, md);
808 list_del(&td->list);
809 kfree(td);
810 }
811 mutex_unlock(&md->table_devices_lock);
812}
813EXPORT_SYMBOL(dm_put_table_device);
814
815static void free_table_devices(struct list_head *devices)
816{
817 struct list_head *tmp, *next;
818
819 list_for_each_safe(tmp, next, devices) {
820 struct table_device *td = list_entry(tmp, struct table_device, list);
821
822 DMWARN("dm_destroy: %s still exists with %d references",
823 td->dm_dev.name, atomic_read(&td->count));
824 kfree(td);
825 }
826}
827
828
829
830
831int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
832{
833 *geo = md->geometry;
834
835 return 0;
836}
837
838
839
840
841int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
842{
843 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
844
845 if (geo->start > sz) {
846 DMWARN("Start sector is beyond the geometry limits.");
847 return -EINVAL;
848 }
849
850 md->geometry = *geo;
851
852 return 0;
853}
854
855
856
857
858
859
860
861
862
863
864static int __noflush_suspending(struct mapped_device *md)
865{
866 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
867}
868
869
870
871
872
873static void dec_pending(struct dm_io *io, int error)
874{
875 unsigned long flags;
876 int io_error;
877 struct bio *bio;
878 struct mapped_device *md = io->md;
879
880
881 if (unlikely(error)) {
882 spin_lock_irqsave(&io->endio_lock, flags);
883 if (!(io->error > 0 && __noflush_suspending(md)))
884 io->error = error;
885 spin_unlock_irqrestore(&io->endio_lock, flags);
886 }
887
888 if (atomic_dec_and_test(&io->io_count)) {
889 if (io->error == DM_ENDIO_REQUEUE) {
890
891
892
893 spin_lock_irqsave(&md->deferred_lock, flags);
894 if (__noflush_suspending(md))
895 bio_list_add_head(&md->deferred, io->bio);
896 else
897
898 io->error = -EIO;
899 spin_unlock_irqrestore(&md->deferred_lock, flags);
900 }
901
902 io_error = io->error;
903 bio = io->bio;
904 end_io_acct(io);
905 free_io(md, io);
906
907 if (io_error == DM_ENDIO_REQUEUE)
908 return;
909
910 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
911
912
913
914
915 bio->bi_rw &= ~REQ_FLUSH;
916 queue_io(md, bio);
917 } else {
918
919 trace_block_bio_complete(md->queue, bio, io_error);
920 bio_endio(bio, io_error);
921 }
922 }
923}
924
925static void disable_write_same(struct mapped_device *md)
926{
927 struct queue_limits *limits = dm_get_queue_limits(md);
928
929
930 limits->max_write_same_sectors = 0;
931}
932
933static void clone_endio(struct bio *bio, int error)
934{
935 int r = error;
936 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
937 struct dm_io *io = tio->io;
938 struct mapped_device *md = tio->io->md;
939 dm_endio_fn endio = tio->ti->type->end_io;
940
941 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
942 error = -EIO;
943
944 if (endio) {
945 r = endio(tio->ti, bio, error);
946 if (r < 0 || r == DM_ENDIO_REQUEUE)
947
948
949
950
951 error = r;
952 else if (r == DM_ENDIO_INCOMPLETE)
953
954 return;
955 else if (r) {
956 DMWARN("unimplemented target endio return value: %d", r);
957 BUG();
958 }
959 }
960
961 if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
962 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
963 disable_write_same(md);
964
965 free_tio(md, tio);
966 dec_pending(io, error);
967}
968
969
970
971
972static void end_clone_bio(struct bio *clone, int error)
973{
974 struct dm_rq_clone_bio_info *info =
975 container_of(clone, struct dm_rq_clone_bio_info, clone);
976 struct dm_rq_target_io *tio = info->tio;
977 struct bio *bio = info->orig;
978 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
979
980 bio_put(clone);
981
982 if (tio->error)
983
984
985
986
987
988 return;
989 else if (error) {
990
991
992
993
994
995 tio->error = error;
996 return;
997 }
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009 if (tio->orig->bio != bio)
1010 DMERR("bio completion is going in the middle of the request");
1011
1012
1013
1014
1015
1016
1017 blk_update_request(tio->orig, 0, nr_bytes);
1018}
1019
1020
1021
1022
1023
1024
1025static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
1026{
1027 atomic_dec(&md->pending[rw]);
1028
1029
1030 if (!md_in_flight(md))
1031 wake_up(&md->wait);
1032
1033
1034
1035
1036
1037
1038
1039 if (run_queue)
1040 blk_run_queue_async(md->queue);
1041
1042
1043
1044
1045 dm_put(md);
1046}
1047
1048static void free_rq_clone(struct request *clone)
1049{
1050 struct dm_rq_target_io *tio = clone->end_io_data;
1051
1052 blk_rq_unprep_clone(clone);
1053 if (clone->q && clone->q->mq_ops)
1054 tio->ti->type->release_clone_rq(clone);
1055 else
1056 free_clone_request(tio->md, clone);
1057 free_rq_tio(tio);
1058}
1059
1060
1061
1062
1063
1064
1065static void dm_end_request(struct request *clone, int error)
1066{
1067 int rw = rq_data_dir(clone);
1068 struct dm_rq_target_io *tio = clone->end_io_data;
1069 struct mapped_device *md = tio->md;
1070 struct request *rq = tio->orig;
1071
1072 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
1073 rq->errors = clone->errors;
1074 rq->resid_len = clone->resid_len;
1075
1076 if (rq->sense)
1077
1078
1079
1080
1081
1082 rq->sense_len = clone->sense_len;
1083 }
1084
1085 free_rq_clone(clone);
1086 blk_end_request_all(rq, error);
1087 rq_completed(md, rw, true);
1088}
1089
1090static void dm_unprep_request(struct request *rq)
1091{
1092 struct dm_rq_target_io *tio = rq->special;
1093 struct request *clone = tio->clone;
1094
1095 rq->special = NULL;
1096 rq->cmd_flags &= ~REQ_DONTPREP;
1097
1098 if (clone)
1099 free_rq_clone(clone);
1100}
1101
1102
1103
1104
1105static void dm_requeue_unmapped_original_request(struct mapped_device *md,
1106 struct request *rq)
1107{
1108 int rw = rq_data_dir(rq);
1109 struct request_queue *q = rq->q;
1110 unsigned long flags;
1111
1112 dm_unprep_request(rq);
1113
1114 spin_lock_irqsave(q->queue_lock, flags);
1115 blk_requeue_request(q, rq);
1116 spin_unlock_irqrestore(q->queue_lock, flags);
1117
1118 rq_completed(md, rw, false);
1119}
1120
1121static void dm_requeue_unmapped_request(struct request *clone)
1122{
1123 struct dm_rq_target_io *tio = clone->end_io_data;
1124
1125 dm_requeue_unmapped_original_request(tio->md, tio->orig);
1126}
1127
1128static void __stop_queue(struct request_queue *q)
1129{
1130 blk_stop_queue(q);
1131}
1132
1133static void stop_queue(struct request_queue *q)
1134{
1135 unsigned long flags;
1136
1137 spin_lock_irqsave(q->queue_lock, flags);
1138 __stop_queue(q);
1139 spin_unlock_irqrestore(q->queue_lock, flags);
1140}
1141
1142static void __start_queue(struct request_queue *q)
1143{
1144 if (blk_queue_stopped(q))
1145 blk_start_queue(q);
1146}
1147
1148static void start_queue(struct request_queue *q)
1149{
1150 unsigned long flags;
1151
1152 spin_lock_irqsave(q->queue_lock, flags);
1153 __start_queue(q);
1154 spin_unlock_irqrestore(q->queue_lock, flags);
1155}
1156
1157static void dm_done(struct request *clone, int error, bool mapped)
1158{
1159 int r = error;
1160 struct dm_rq_target_io *tio = clone->end_io_data;
1161 dm_request_endio_fn rq_end_io = NULL;
1162
1163 if (tio->ti) {
1164 rq_end_io = tio->ti->type->rq_end_io;
1165
1166 if (mapped && rq_end_io)
1167 r = rq_end_io(tio->ti, clone, error, &tio->info);
1168 }
1169
1170 if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
1171 !clone->q->limits.max_write_same_sectors))
1172 disable_write_same(tio->md);
1173
1174 if (r <= 0)
1175
1176 dm_end_request(clone, r);
1177 else if (r == DM_ENDIO_INCOMPLETE)
1178
1179 return;
1180 else if (r == DM_ENDIO_REQUEUE)
1181
1182 dm_requeue_unmapped_request(clone);
1183 else {
1184 DMWARN("unimplemented target endio return value: %d", r);
1185 BUG();
1186 }
1187}
1188
1189
1190
1191
1192static void dm_softirq_done(struct request *rq)
1193{
1194 bool mapped = true;
1195 struct dm_rq_target_io *tio = rq->special;
1196 struct request *clone = tio->clone;
1197
1198 if (!clone) {
1199 blk_end_request_all(rq, tio->error);
1200 rq_completed(tio->md, rq_data_dir(rq), false);
1201 free_rq_tio(tio);
1202 return;
1203 }
1204
1205 if (rq->cmd_flags & REQ_FAILED)
1206 mapped = false;
1207
1208 dm_done(clone, tio->error, mapped);
1209}
1210
1211
1212
1213
1214
1215static void dm_complete_request(struct request *rq, int error)
1216{
1217 struct dm_rq_target_io *tio = rq->special;
1218
1219 tio->error = error;
1220 blk_complete_request(rq);
1221}
1222
1223
1224
1225
1226
1227
1228
1229static void dm_kill_unmapped_request(struct request *rq, int error)
1230{
1231 rq->cmd_flags |= REQ_FAILED;
1232 dm_complete_request(rq, error);
1233}
1234
1235
1236
1237
1238static void end_clone_request(struct request *clone, int error)
1239{
1240 struct dm_rq_target_io *tio = clone->end_io_data;
1241
1242 if (!clone->q->mq_ops) {
1243
1244
1245
1246
1247
1248
1249 __blk_put_request(clone->q, clone);
1250 }
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260 dm_complete_request(tio->orig, error);
1261}
1262
1263
1264
1265
1266
1267static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
1268{
1269 sector_t target_offset = dm_target_offset(ti, sector);
1270
1271 return ti->len - target_offset;
1272}
1273
1274static sector_t max_io_len(sector_t sector, struct dm_target *ti)
1275{
1276 sector_t len = max_io_len_target_boundary(sector, ti);
1277 sector_t offset, max_len;
1278
1279
1280
1281
1282 if (ti->max_io_len) {
1283 offset = dm_target_offset(ti, sector);
1284 if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
1285 max_len = sector_div(offset, ti->max_io_len);
1286 else
1287 max_len = offset & (ti->max_io_len - 1);
1288 max_len = ti->max_io_len - max_len;
1289
1290 if (len > max_len)
1291 len = max_len;
1292 }
1293
1294 return len;
1295}
1296
1297int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
1298{
1299 if (len > UINT_MAX) {
1300 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
1301 (unsigned long long)len, UINT_MAX);
1302 ti->error = "Maximum size of target IO is too large";
1303 return -EINVAL;
1304 }
1305
1306 ti->max_io_len = (uint32_t) len;
1307
1308 return 0;
1309}
1310EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1341{
1342 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1343 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
1344 BUG_ON(bio->bi_rw & REQ_FLUSH);
1345 BUG_ON(bi_size > *tio->len_ptr);
1346 BUG_ON(n_sectors > bi_size);
1347 *tio->len_ptr -= bi_size - n_sectors;
1348 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1349}
1350EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1351
1352static void __map_bio(struct dm_target_io *tio)
1353{
1354 int r;
1355 sector_t sector;
1356 struct mapped_device *md;
1357 struct bio *clone = &tio->clone;
1358 struct dm_target *ti = tio->ti;
1359
1360 clone->bi_end_io = clone_endio;
1361
1362
1363
1364
1365
1366
1367 atomic_inc(&tio->io->io_count);
1368 sector = clone->bi_iter.bi_sector;
1369 r = ti->type->map(ti, clone);
1370 if (r == DM_MAPIO_REMAPPED) {
1371
1372
1373 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
1374 tio->io->bio->bi_bdev->bd_dev, sector);
1375
1376 generic_make_request(clone);
1377 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
1378
1379 md = tio->io->md;
1380 dec_pending(tio->io, r);
1381 free_tio(md, tio);
1382 } else if (r) {
1383 DMWARN("unimplemented target map return value: %d", r);
1384 BUG();
1385 }
1386}
1387
1388struct clone_info {
1389 struct mapped_device *md;
1390 struct dm_table *map;
1391 struct bio *bio;
1392 struct dm_io *io;
1393 sector_t sector;
1394 unsigned sector_count;
1395};
1396
1397static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
1398{
1399 bio->bi_iter.bi_sector = sector;
1400 bio->bi_iter.bi_size = to_bytes(len);
1401}
1402
1403
1404
1405
1406static void clone_bio(struct dm_target_io *tio, struct bio *bio,
1407 sector_t sector, unsigned len)
1408{
1409 struct bio *clone = &tio->clone;
1410
1411 __bio_clone_fast(clone, bio);
1412
1413 if (bio_integrity(bio))
1414 bio_integrity_clone(clone, bio, GFP_NOIO);
1415
1416 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
1417 clone->bi_iter.bi_size = to_bytes(len);
1418
1419 if (bio_integrity(bio))
1420 bio_integrity_trim(clone, 0, len);
1421}
1422
1423static struct dm_target_io *alloc_tio(struct clone_info *ci,
1424 struct dm_target *ti,
1425 unsigned target_bio_nr)
1426{
1427 struct dm_target_io *tio;
1428 struct bio *clone;
1429
1430 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
1431 tio = container_of(clone, struct dm_target_io, clone);
1432
1433 tio->io = ci->io;
1434 tio->ti = ti;
1435 tio->target_bio_nr = target_bio_nr;
1436
1437 return tio;
1438}
1439
1440static void __clone_and_map_simple_bio(struct clone_info *ci,
1441 struct dm_target *ti,
1442 unsigned target_bio_nr, unsigned *len)
1443{
1444 struct dm_target_io *tio = alloc_tio(ci, ti, target_bio_nr);
1445 struct bio *clone = &tio->clone;
1446
1447 tio->len_ptr = len;
1448
1449 __bio_clone_fast(clone, ci->bio);
1450 if (len)
1451 bio_setup_sector(clone, ci->sector, *len);
1452
1453 __map_bio(tio);
1454}
1455
1456static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
1457 unsigned num_bios, unsigned *len)
1458{
1459 unsigned target_bio_nr;
1460
1461 for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++)
1462 __clone_and_map_simple_bio(ci, ti, target_bio_nr, len);
1463}
1464
1465static int __send_empty_flush(struct clone_info *ci)
1466{
1467 unsigned target_nr = 0;
1468 struct dm_target *ti;
1469
1470 BUG_ON(bio_has_data(ci->bio));
1471 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1472 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
1473
1474 return 0;
1475}
1476
1477static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
1478 sector_t sector, unsigned *len)
1479{
1480 struct bio *bio = ci->bio;
1481 struct dm_target_io *tio;
1482 unsigned target_bio_nr;
1483 unsigned num_target_bios = 1;
1484
1485
1486
1487
1488 if (bio_data_dir(bio) == WRITE && ti->num_write_bios)
1489 num_target_bios = ti->num_write_bios(ti, bio);
1490
1491 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
1492 tio = alloc_tio(ci, ti, target_bio_nr);
1493 tio->len_ptr = len;
1494 clone_bio(tio, bio, sector, *len);
1495 __map_bio(tio);
1496 }
1497}
1498
1499typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
1500
1501static unsigned get_num_discard_bios(struct dm_target *ti)
1502{
1503 return ti->num_discard_bios;
1504}
1505
1506static unsigned get_num_write_same_bios(struct dm_target *ti)
1507{
1508 return ti->num_write_same_bios;
1509}
1510
1511typedef bool (*is_split_required_fn)(struct dm_target *ti);
1512
1513static bool is_split_required_for_discard(struct dm_target *ti)
1514{
1515 return ti->split_discard_bios;
1516}
1517
1518static int __send_changing_extent_only(struct clone_info *ci,
1519 get_num_bios_fn get_num_bios,
1520 is_split_required_fn is_split_required)
1521{
1522 struct dm_target *ti;
1523 unsigned len;
1524 unsigned num_bios;
1525
1526 do {
1527 ti = dm_table_find_target(ci->map, ci->sector);
1528 if (!dm_target_is_valid(ti))
1529 return -EIO;
1530
1531
1532
1533
1534
1535
1536
1537 num_bios = get_num_bios ? get_num_bios(ti) : 0;
1538 if (!num_bios)
1539 return -EOPNOTSUPP;
1540
1541 if (is_split_required && !is_split_required(ti))
1542 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1543 else
1544 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
1545
1546 __send_duplicate_bios(ci, ti, num_bios, &len);
1547
1548 ci->sector += len;
1549 } while (ci->sector_count -= len);
1550
1551 return 0;
1552}
1553
1554static int __send_discard(struct clone_info *ci)
1555{
1556 return __send_changing_extent_only(ci, get_num_discard_bios,
1557 is_split_required_for_discard);
1558}
1559
1560static int __send_write_same(struct clone_info *ci)
1561{
1562 return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
1563}
1564
1565
1566
1567
1568static int __split_and_process_non_flush(struct clone_info *ci)
1569{
1570 struct bio *bio = ci->bio;
1571 struct dm_target *ti;
1572 unsigned len;
1573
1574 if (unlikely(bio->bi_rw & REQ_DISCARD))
1575 return __send_discard(ci);
1576 else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
1577 return __send_write_same(ci);
1578
1579 ti = dm_table_find_target(ci->map, ci->sector);
1580 if (!dm_target_is_valid(ti))
1581 return -EIO;
1582
1583 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
1584
1585 __clone_and_map_data_bio(ci, ti, ci->sector, &len);
1586
1587 ci->sector += len;
1588 ci->sector_count -= len;
1589
1590 return 0;
1591}
1592
1593
1594
1595
1596static void __split_and_process_bio(struct mapped_device *md,
1597 struct dm_table *map, struct bio *bio)
1598{
1599 struct clone_info ci;
1600 int error = 0;
1601
1602 if (unlikely(!map)) {
1603 bio_io_error(bio);
1604 return;
1605 }
1606
1607 ci.map = map;
1608 ci.md = md;
1609 ci.io = alloc_io(md);
1610 ci.io->error = 0;
1611 atomic_set(&ci.io->io_count, 1);
1612 ci.io->bio = bio;
1613 ci.io->md = md;
1614 spin_lock_init(&ci.io->endio_lock);
1615 ci.sector = bio->bi_iter.bi_sector;
1616
1617 start_io_acct(ci.io);
1618
1619 if (bio->bi_rw & REQ_FLUSH) {
1620 ci.bio = &ci.md->flush_bio;
1621 ci.sector_count = 0;
1622 error = __send_empty_flush(&ci);
1623
1624 } else {
1625 ci.bio = bio;
1626 ci.sector_count = bio_sectors(bio);
1627 while (ci.sector_count && !error)
1628 error = __split_and_process_non_flush(&ci);
1629 }
1630
1631
1632 dec_pending(ci.io, error);
1633}
1634
1635
1636
1637
1638static int dm_merge_bvec(struct request_queue *q,
1639 struct bvec_merge_data *bvm,
1640 struct bio_vec *biovec)
1641{
1642 struct mapped_device *md = q->queuedata;
1643 struct dm_table *map = dm_get_live_table_fast(md);
1644 struct dm_target *ti;
1645 sector_t max_sectors;
1646 int max_size = 0;
1647
1648 if (unlikely(!map))
1649 goto out;
1650
1651 ti = dm_table_find_target(map, bvm->bi_sector);
1652 if (!dm_target_is_valid(ti))
1653 goto out;
1654
1655
1656
1657
1658 max_sectors = min(max_io_len(bvm->bi_sector, ti),
1659 (sector_t) queue_max_sectors(q));
1660 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
1661 if (unlikely(max_size < 0))
1662 max_size = 0;
1663
1664
1665
1666
1667
1668
1669 if (max_size && ti->type->merge)
1670 max_size = ti->type->merge(ti, bvm, biovec, max_size);
1671
1672
1673
1674
1675
1676
1677
1678 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1679 max_size = 0;
1680
1681out:
1682 dm_put_live_table_fast(md);
1683
1684
1685
1686 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
1687 max_size = biovec->bv_len;
1688
1689 return max_size;
1690}
1691
1692
1693
1694
1695
1696static void _dm_request(struct request_queue *q, struct bio *bio)
1697{
1698 int rw = bio_data_dir(bio);
1699 struct mapped_device *md = q->queuedata;
1700 int srcu_idx;
1701 struct dm_table *map;
1702
1703 map = dm_get_live_table(md, &srcu_idx);
1704
1705 generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
1706
1707
1708 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
1709 dm_put_live_table(md, srcu_idx);
1710
1711 if (bio_rw(bio) != READA)
1712 queue_io(md, bio);
1713 else
1714 bio_io_error(bio);
1715 return;
1716 }
1717
1718 __split_and_process_bio(md, map, bio);
1719 dm_put_live_table(md, srcu_idx);
1720 return;
1721}
1722
1723int dm_request_based(struct mapped_device *md)
1724{
1725 return blk_queue_stackable(md->queue);
1726}
1727
1728static void dm_request(struct request_queue *q, struct bio *bio)
1729{
1730 struct mapped_device *md = q->queuedata;
1731
1732 if (dm_request_based(md))
1733 blk_queue_bio(q, bio);
1734 else
1735 _dm_request(q, bio);
1736}
1737
1738static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
1739{
1740 int r;
1741
1742 if (blk_queue_io_stat(clone->q))
1743 clone->cmd_flags |= REQ_IO_STAT;
1744
1745 clone->start_time = jiffies;
1746 r = blk_insert_cloned_request(clone->q, clone);
1747 if (r)
1748
1749 dm_complete_request(rq, r);
1750}
1751
1752static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1753 void *data)
1754{
1755 struct dm_rq_target_io *tio = data;
1756 struct dm_rq_clone_bio_info *info =
1757 container_of(bio, struct dm_rq_clone_bio_info, clone);
1758
1759 info->orig = bio_orig;
1760 info->tio = tio;
1761 bio->bi_end_io = end_clone_bio;
1762
1763 return 0;
1764}
1765
1766static int setup_clone(struct request *clone, struct request *rq,
1767 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1768{
1769 int r;
1770
1771 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
1772 dm_rq_bio_constructor, tio);
1773 if (r)
1774 return r;
1775
1776 clone->cmd = rq->cmd;
1777 clone->cmd_len = rq->cmd_len;
1778 clone->sense = rq->sense;
1779 clone->end_io = end_clone_request;
1780 clone->end_io_data = tio;
1781
1782 tio->clone = clone;
1783
1784 return 0;
1785}
1786
1787static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1788 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1789{
1790 struct request *clone = alloc_clone_request(md, gfp_mask);
1791
1792 if (!clone)
1793 return NULL;
1794
1795 blk_rq_init(NULL, clone);
1796 if (setup_clone(clone, rq, tio, gfp_mask)) {
1797
1798 free_clone_request(md, clone);
1799 return NULL;
1800 }
1801
1802 return clone;
1803}
1804
1805static void map_tio_request(struct kthread_work *work);
1806
1807static struct dm_rq_target_io *prep_tio(struct request *rq,
1808 struct mapped_device *md, gfp_t gfp_mask)
1809{
1810 struct dm_rq_target_io *tio;
1811 int srcu_idx;
1812 struct dm_table *table;
1813
1814 tio = alloc_rq_tio(md, gfp_mask);
1815 if (!tio)
1816 return NULL;
1817
1818 tio->md = md;
1819 tio->ti = NULL;
1820 tio->clone = NULL;
1821 tio->orig = rq;
1822 tio->error = 0;
1823 memset(&tio->info, 0, sizeof(tio->info));
1824 init_kthread_work(&tio->work, map_tio_request);
1825
1826 table = dm_get_live_table(md, &srcu_idx);
1827 if (!dm_table_mq_request_based(table)) {
1828 if (!clone_rq(rq, md, tio, gfp_mask)) {
1829 dm_put_live_table(md, srcu_idx);
1830 free_rq_tio(tio);
1831 return NULL;
1832 }
1833 }
1834 dm_put_live_table(md, srcu_idx);
1835
1836 return tio;
1837}
1838
1839
1840
1841
1842static int dm_prep_fn(struct request_queue *q, struct request *rq)
1843{
1844 struct mapped_device *md = q->queuedata;
1845 struct dm_rq_target_io *tio;
1846
1847 if (unlikely(rq->special)) {
1848 DMWARN("Already has something in rq->special.");
1849 return BLKPREP_KILL;
1850 }
1851
1852 tio = prep_tio(rq, md, GFP_ATOMIC);
1853 if (!tio)
1854 return BLKPREP_DEFER;
1855
1856 rq->special = tio;
1857 rq->cmd_flags |= REQ_DONTPREP;
1858
1859 return BLKPREP_OK;
1860}
1861
1862
1863
1864
1865
1866
1867
1868static int map_request(struct dm_target *ti, struct request *rq,
1869 struct mapped_device *md)
1870{
1871 int r;
1872 struct dm_rq_target_io *tio = rq->special;
1873 struct request *clone = NULL;
1874
1875 if (tio->clone) {
1876 clone = tio->clone;
1877 r = ti->type->map_rq(ti, clone, &tio->info);
1878 } else {
1879 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
1880 if (r < 0) {
1881
1882 dm_kill_unmapped_request(rq, r);
1883 return r;
1884 }
1885 if (IS_ERR(clone))
1886 return DM_MAPIO_REQUEUE;
1887 if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
1888
1889 ti->type->release_clone_rq(clone);
1890 return DM_MAPIO_REQUEUE;
1891 }
1892 }
1893
1894 switch (r) {
1895 case DM_MAPIO_SUBMITTED:
1896
1897 break;
1898 case DM_MAPIO_REMAPPED:
1899
1900 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1901 blk_rq_pos(rq));
1902 dm_dispatch_clone_request(clone, rq);
1903 break;
1904 case DM_MAPIO_REQUEUE:
1905
1906 dm_requeue_unmapped_request(clone);
1907 break;
1908 default:
1909 if (r > 0) {
1910 DMWARN("unimplemented target map return value: %d", r);
1911 BUG();
1912 }
1913
1914
1915 dm_kill_unmapped_request(rq, r);
1916 return r;
1917 }
1918
1919 return 0;
1920}
1921
1922static void map_tio_request(struct kthread_work *work)
1923{
1924 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
1925 struct request *rq = tio->orig;
1926 struct mapped_device *md = tio->md;
1927
1928 if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
1929 dm_requeue_unmapped_original_request(md, rq);
1930}
1931
1932static void dm_start_request(struct mapped_device *md, struct request *orig)
1933{
1934 blk_start_request(orig);
1935 atomic_inc(&md->pending[rq_data_dir(orig)]);
1936
1937
1938
1939
1940
1941
1942
1943
1944 dm_get(md);
1945}
1946
1947
1948
1949
1950
1951static void dm_request_fn(struct request_queue *q)
1952{
1953 struct mapped_device *md = q->queuedata;
1954 int srcu_idx;
1955 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
1956 struct dm_target *ti;
1957 struct request *rq;
1958 struct dm_rq_target_io *tio;
1959 sector_t pos;
1960
1961
1962
1963
1964
1965
1966
1967 while (!blk_queue_stopped(q)) {
1968 rq = blk_peek_request(q);
1969 if (!rq)
1970 goto delay_and_out;
1971
1972
1973 pos = 0;
1974 if (!(rq->cmd_flags & REQ_FLUSH))
1975 pos = blk_rq_pos(rq);
1976
1977 ti = dm_table_find_target(map, pos);
1978 if (!dm_target_is_valid(ti)) {
1979
1980
1981
1982
1983 DMERR_LIMIT("request attempted access beyond the end of device");
1984 dm_start_request(md, rq);
1985 dm_kill_unmapped_request(rq, -EIO);
1986 continue;
1987 }
1988
1989 if (ti->type->busy && ti->type->busy(ti))
1990 goto delay_and_out;
1991
1992 dm_start_request(md, rq);
1993
1994 tio = rq->special;
1995
1996 tio->ti = ti;
1997 queue_kthread_work(&md->kworker, &tio->work);
1998 BUG_ON(!irqs_disabled());
1999 }
2000
2001 goto out;
2002
2003delay_and_out:
2004 blk_delay_queue(q, HZ / 10);
2005out:
2006 dm_put_live_table(md, srcu_idx);
2007}
2008
2009int dm_underlying_device_busy(struct request_queue *q)
2010{
2011 return blk_lld_busy(q);
2012}
2013EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
2014
2015static int dm_lld_busy(struct request_queue *q)
2016{
2017 int r;
2018 struct mapped_device *md = q->queuedata;
2019 struct dm_table *map = dm_get_live_table_fast(md);
2020
2021 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
2022 r = 1;
2023 else
2024 r = dm_table_any_busy_target(map);
2025
2026 dm_put_live_table_fast(md);
2027
2028 return r;
2029}
2030
2031static int dm_any_congested(void *congested_data, int bdi_bits)
2032{
2033 int r = bdi_bits;
2034 struct mapped_device *md = congested_data;
2035 struct dm_table *map;
2036
2037 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2038 map = dm_get_live_table_fast(md);
2039 if (map) {
2040
2041
2042
2043
2044 if (dm_request_based(md))
2045 r = md->queue->backing_dev_info.state &
2046 bdi_bits;
2047 else
2048 r = dm_table_any_congested(map, bdi_bits);
2049 }
2050 dm_put_live_table_fast(md);
2051 }
2052
2053 return r;
2054}
2055
2056
2057
2058
2059static void free_minor(int minor)
2060{
2061 spin_lock(&_minor_lock);
2062 idr_remove(&_minor_idr, minor);
2063 spin_unlock(&_minor_lock);
2064}
2065
2066
2067
2068
2069static int specific_minor(int minor)
2070{
2071 int r;
2072
2073 if (minor >= (1 << MINORBITS))
2074 return -EINVAL;
2075
2076 idr_preload(GFP_KERNEL);
2077 spin_lock(&_minor_lock);
2078
2079 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
2080
2081 spin_unlock(&_minor_lock);
2082 idr_preload_end();
2083 if (r < 0)
2084 return r == -ENOSPC ? -EBUSY : r;
2085 return 0;
2086}
2087
2088static int next_free_minor(int *minor)
2089{
2090 int r;
2091
2092 idr_preload(GFP_KERNEL);
2093 spin_lock(&_minor_lock);
2094
2095 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
2096
2097 spin_unlock(&_minor_lock);
2098 idr_preload_end();
2099 if (r < 0)
2100 return r;
2101 *minor = r;
2102 return 0;
2103}
2104
2105static const struct block_device_operations dm_blk_dops;
2106
2107static void dm_wq_work(struct work_struct *work);
2108
2109static void dm_init_md_queue(struct mapped_device *md)
2110{
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
2121
2122 md->queue->queuedata = md;
2123 md->queue->backing_dev_info.congested_fn = dm_any_congested;
2124 md->queue->backing_dev_info.congested_data = md;
2125 blk_queue_make_request(md->queue, dm_request);
2126 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
2127 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
2128}
2129
2130
2131
2132
2133static struct mapped_device *alloc_dev(int minor)
2134{
2135 int r;
2136 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
2137 void *old_md;
2138
2139 if (!md) {
2140 DMWARN("unable to allocate device, out of memory.");
2141 return NULL;
2142 }
2143
2144 if (!try_module_get(THIS_MODULE))
2145 goto bad_module_get;
2146
2147
2148 if (minor == DM_ANY_MINOR)
2149 r = next_free_minor(&minor);
2150 else
2151 r = specific_minor(minor);
2152 if (r < 0)
2153 goto bad_minor;
2154
2155 r = init_srcu_struct(&md->io_barrier);
2156 if (r < 0)
2157 goto bad_io_barrier;
2158
2159 md->type = DM_TYPE_NONE;
2160 mutex_init(&md->suspend_lock);
2161 mutex_init(&md->type_lock);
2162 mutex_init(&md->table_devices_lock);
2163 spin_lock_init(&md->deferred_lock);
2164 atomic_set(&md->holders, 1);
2165 atomic_set(&md->open_count, 0);
2166 atomic_set(&md->event_nr, 0);
2167 atomic_set(&md->uevent_seq, 0);
2168 INIT_LIST_HEAD(&md->uevent_list);
2169 INIT_LIST_HEAD(&md->table_devices);
2170 spin_lock_init(&md->uevent_lock);
2171
2172 md->queue = blk_alloc_queue(GFP_KERNEL);
2173 if (!md->queue)
2174 goto bad_queue;
2175
2176 dm_init_md_queue(md);
2177
2178 md->disk = alloc_disk(1);
2179 if (!md->disk)
2180 goto bad_disk;
2181
2182 atomic_set(&md->pending[0], 0);
2183 atomic_set(&md->pending[1], 0);
2184 init_waitqueue_head(&md->wait);
2185 INIT_WORK(&md->work, dm_wq_work);
2186 init_waitqueue_head(&md->eventq);
2187 init_completion(&md->kobj_holder.completion);
2188 md->kworker_task = NULL;
2189
2190 md->disk->major = _major;
2191 md->disk->first_minor = minor;
2192 md->disk->fops = &dm_blk_dops;
2193 md->disk->queue = md->queue;
2194 md->disk->private_data = md;
2195 sprintf(md->disk->disk_name, "dm-%d", minor);
2196 add_disk(md->disk);
2197 format_dev_t(md->name, MKDEV(_major, minor));
2198
2199 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
2200 if (!md->wq)
2201 goto bad_thread;
2202
2203 md->bdev = bdget_disk(md->disk, 0);
2204 if (!md->bdev)
2205 goto bad_bdev;
2206
2207 bio_init(&md->flush_bio);
2208 md->flush_bio.bi_bdev = md->bdev;
2209 md->flush_bio.bi_rw = WRITE_FLUSH;
2210
2211 dm_stats_init(&md->stats);
2212
2213
2214 spin_lock(&_minor_lock);
2215 old_md = idr_replace(&_minor_idr, md, minor);
2216 spin_unlock(&_minor_lock);
2217
2218 BUG_ON(old_md != MINOR_ALLOCED);
2219
2220 return md;
2221
2222bad_bdev:
2223 destroy_workqueue(md->wq);
2224bad_thread:
2225 del_gendisk(md->disk);
2226 put_disk(md->disk);
2227bad_disk:
2228 blk_cleanup_queue(md->queue);
2229bad_queue:
2230 cleanup_srcu_struct(&md->io_barrier);
2231bad_io_barrier:
2232 free_minor(minor);
2233bad_minor:
2234 module_put(THIS_MODULE);
2235bad_module_get:
2236 kfree(md);
2237 return NULL;
2238}
2239
2240static void unlock_fs(struct mapped_device *md);
2241
2242static void free_dev(struct mapped_device *md)
2243{
2244 int minor = MINOR(disk_devt(md->disk));
2245
2246 unlock_fs(md);
2247 destroy_workqueue(md->wq);
2248
2249 if (md->kworker_task)
2250 kthread_stop(md->kworker_task);
2251 if (md->io_pool)
2252 mempool_destroy(md->io_pool);
2253 if (md->rq_pool)
2254 mempool_destroy(md->rq_pool);
2255 if (md->bs)
2256 bioset_free(md->bs);
2257
2258 cleanup_srcu_struct(&md->io_barrier);
2259 free_table_devices(&md->table_devices);
2260 dm_stats_cleanup(&md->stats);
2261
2262 spin_lock(&_minor_lock);
2263 md->disk->private_data = NULL;
2264 spin_unlock(&_minor_lock);
2265 if (blk_get_integrity(md->disk))
2266 blk_integrity_unregister(md->disk);
2267 del_gendisk(md->disk);
2268 put_disk(md->disk);
2269 blk_cleanup_queue(md->queue);
2270 bdput(md->bdev);
2271 free_minor(minor);
2272
2273 module_put(THIS_MODULE);
2274 kfree(md);
2275}
2276
2277static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2278{
2279 struct dm_md_mempools *p = dm_table_get_md_mempools(t);
2280
2281 if (md->io_pool && md->bs) {
2282
2283 if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
2284
2285
2286
2287
2288 bioset_free(md->bs);
2289 md->bs = p->bs;
2290 p->bs = NULL;
2291 }
2292
2293
2294
2295
2296
2297
2298
2299
2300 goto out;
2301 }
2302
2303 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
2304
2305 md->io_pool = p->io_pool;
2306 p->io_pool = NULL;
2307 md->rq_pool = p->rq_pool;
2308 p->rq_pool = NULL;
2309 md->bs = p->bs;
2310 p->bs = NULL;
2311
2312out:
2313
2314 dm_table_free_md_mempools(t);
2315}
2316
2317
2318
2319
2320static void event_callback(void *context)
2321{
2322 unsigned long flags;
2323 LIST_HEAD(uevents);
2324 struct mapped_device *md = (struct mapped_device *) context;
2325
2326 spin_lock_irqsave(&md->uevent_lock, flags);
2327 list_splice_init(&md->uevent_list, &uevents);
2328 spin_unlock_irqrestore(&md->uevent_lock, flags);
2329
2330 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
2331
2332 atomic_inc(&md->event_nr);
2333 wake_up(&md->eventq);
2334}
2335
2336
2337
2338
2339static void __set_size(struct mapped_device *md, sector_t size)
2340{
2341 set_capacity(md->disk, size);
2342
2343 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
2344}
2345
2346
2347
2348
2349
2350
2351
2352
2353int dm_queue_merge_is_compulsory(struct request_queue *q)
2354{
2355 struct mapped_device *dev_md;
2356
2357 if (!q->merge_bvec_fn)
2358 return 0;
2359
2360 if (q->make_request_fn == dm_request) {
2361 dev_md = q->queuedata;
2362 if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
2363 return 0;
2364 }
2365
2366 return 1;
2367}
2368
2369static int dm_device_merge_is_compulsory(struct dm_target *ti,
2370 struct dm_dev *dev, sector_t start,
2371 sector_t len, void *data)
2372{
2373 struct block_device *bdev = dev->bdev;
2374 struct request_queue *q = bdev_get_queue(bdev);
2375
2376 return dm_queue_merge_is_compulsory(q);
2377}
2378
2379
2380
2381
2382
2383static int dm_table_merge_is_optional(struct dm_table *table)
2384{
2385 unsigned i = 0;
2386 struct dm_target *ti;
2387
2388 while (i < dm_table_get_num_targets(table)) {
2389 ti = dm_table_get_target(table, i++);
2390
2391 if (ti->type->iterate_devices &&
2392 ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
2393 return 0;
2394 }
2395
2396 return 1;
2397}
2398
2399
2400
2401
2402static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2403 struct queue_limits *limits)
2404{
2405 struct dm_table *old_map;
2406 struct request_queue *q = md->queue;
2407 sector_t size;
2408 int merge_is_optional;
2409
2410 size = dm_table_get_size(t);
2411
2412
2413
2414
2415 if (size != dm_get_size(md))
2416 memset(&md->geometry, 0, sizeof(md->geometry));
2417
2418 __set_size(md, size);
2419
2420 dm_table_event_callback(t, event_callback, md);
2421
2422
2423
2424
2425
2426
2427
2428
2429 if (dm_table_request_based(t) && !blk_queue_stopped(q))
2430 stop_queue(q);
2431
2432 __bind_mempools(md, t);
2433
2434 merge_is_optional = dm_table_merge_is_optional(t);
2435
2436 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2437 rcu_assign_pointer(md->map, t);
2438 md->immutable_target_type = dm_table_get_immutable_target_type(t);
2439
2440 dm_table_set_restrictions(t, q, limits);
2441 if (merge_is_optional)
2442 set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2443 else
2444 clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2445 if (old_map)
2446 dm_sync_table(md);
2447
2448 return old_map;
2449}
2450
2451
2452
2453
2454static struct dm_table *__unbind(struct mapped_device *md)
2455{
2456 struct dm_table *map = rcu_dereference_protected(md->map, 1);
2457
2458 if (!map)
2459 return NULL;
2460
2461 dm_table_event_callback(map, NULL, NULL);
2462 RCU_INIT_POINTER(md->map, NULL);
2463 dm_sync_table(md);
2464
2465 return map;
2466}
2467
2468
2469
2470
2471int dm_create(int minor, struct mapped_device **result)
2472{
2473 struct mapped_device *md;
2474
2475 md = alloc_dev(minor);
2476 if (!md)
2477 return -ENXIO;
2478
2479 dm_sysfs_init(md);
2480
2481 *result = md;
2482 return 0;
2483}
2484
2485
2486
2487
2488
2489void dm_lock_md_type(struct mapped_device *md)
2490{
2491 mutex_lock(&md->type_lock);
2492}
2493
2494void dm_unlock_md_type(struct mapped_device *md)
2495{
2496 mutex_unlock(&md->type_lock);
2497}
2498
2499void dm_set_md_type(struct mapped_device *md, unsigned type)
2500{
2501 BUG_ON(!mutex_is_locked(&md->type_lock));
2502 md->type = type;
2503}
2504
2505unsigned dm_get_md_type(struct mapped_device *md)
2506{
2507 BUG_ON(!mutex_is_locked(&md->type_lock));
2508 return md->type;
2509}
2510
2511static bool dm_md_type_request_based(struct mapped_device *md)
2512{
2513 unsigned table_type = dm_get_md_type(md);
2514
2515 return (table_type == DM_TYPE_REQUEST_BASED ||
2516 table_type == DM_TYPE_MQ_REQUEST_BASED);
2517}
2518
2519struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2520{
2521 return md->immutable_target_type;
2522}
2523
2524
2525
2526
2527
2528struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2529{
2530 BUG_ON(!atomic_read(&md->holders));
2531 return &md->queue->limits;
2532}
2533EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2534
2535
2536
2537
2538static int dm_init_request_based_queue(struct mapped_device *md)
2539{
2540 struct request_queue *q = NULL;
2541
2542 if (md->queue->elevator)
2543 return 1;
2544
2545
2546 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
2547 if (!q)
2548 return 0;
2549
2550 md->queue = q;
2551 dm_init_md_queue(md);
2552 blk_queue_softirq_done(md->queue, dm_softirq_done);
2553 blk_queue_prep_rq(md->queue, dm_prep_fn);
2554 blk_queue_lld_busy(md->queue, dm_lld_busy);
2555
2556
2557 init_kthread_worker(&md->kworker);
2558 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2559 "kdmwork-%s", dm_device_name(md));
2560
2561 elv_register_queue(md->queue);
2562
2563 return 1;
2564}
2565
2566
2567
2568
2569int dm_setup_md_queue(struct mapped_device *md)
2570{
2571 if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
2572 DMWARN("Cannot initialize queue for request-based mapped device");
2573 return -EINVAL;
2574 }
2575
2576 return 0;
2577}
2578
2579struct mapped_device *dm_get_md(dev_t dev)
2580{
2581 struct mapped_device *md;
2582 unsigned minor = MINOR(dev);
2583
2584 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2585 return NULL;
2586
2587 spin_lock(&_minor_lock);
2588
2589 md = idr_find(&_minor_idr, minor);
2590 if (md) {
2591 if ((md == MINOR_ALLOCED ||
2592 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2593 dm_deleting_md(md) ||
2594 test_bit(DMF_FREEING, &md->flags))) {
2595 md = NULL;
2596 goto out;
2597 }
2598 dm_get(md);
2599 }
2600
2601out:
2602 spin_unlock(&_minor_lock);
2603
2604 return md;
2605}
2606EXPORT_SYMBOL_GPL(dm_get_md);
2607
2608void *dm_get_mdptr(struct mapped_device *md)
2609{
2610 return md->interface_ptr;
2611}
2612
2613void dm_set_mdptr(struct mapped_device *md, void *ptr)
2614{
2615 md->interface_ptr = ptr;
2616}
2617
2618void dm_get(struct mapped_device *md)
2619{
2620 atomic_inc(&md->holders);
2621 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2622}
2623
2624int dm_hold(struct mapped_device *md)
2625{
2626 spin_lock(&_minor_lock);
2627 if (test_bit(DMF_FREEING, &md->flags)) {
2628 spin_unlock(&_minor_lock);
2629 return -EBUSY;
2630 }
2631 dm_get(md);
2632 spin_unlock(&_minor_lock);
2633 return 0;
2634}
2635EXPORT_SYMBOL_GPL(dm_hold);
2636
2637const char *dm_device_name(struct mapped_device *md)
2638{
2639 return md->name;
2640}
2641EXPORT_SYMBOL_GPL(dm_device_name);
2642
2643static void __dm_destroy(struct mapped_device *md, bool wait)
2644{
2645 struct dm_table *map;
2646 int srcu_idx;
2647
2648 might_sleep();
2649
2650 map = dm_get_live_table(md, &srcu_idx);
2651
2652 spin_lock(&_minor_lock);
2653 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2654 set_bit(DMF_FREEING, &md->flags);
2655 spin_unlock(&_minor_lock);
2656
2657 if (dm_request_based(md))
2658 flush_kthread_worker(&md->kworker);
2659
2660
2661
2662
2663
2664 mutex_lock(&md->suspend_lock);
2665 if (!dm_suspended_md(md)) {
2666 dm_table_presuspend_targets(map);
2667 dm_table_postsuspend_targets(map);
2668 }
2669 mutex_unlock(&md->suspend_lock);
2670
2671
2672 dm_put_live_table(md, srcu_idx);
2673
2674
2675
2676
2677
2678
2679
2680 if (wait)
2681 while (atomic_read(&md->holders))
2682 msleep(1);
2683 else if (atomic_read(&md->holders))
2684 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2685 dm_device_name(md), atomic_read(&md->holders));
2686
2687 dm_sysfs_exit(md);
2688 dm_table_destroy(__unbind(md));
2689 free_dev(md);
2690}
2691
2692void dm_destroy(struct mapped_device *md)
2693{
2694 __dm_destroy(md, true);
2695}
2696
2697void dm_destroy_immediate(struct mapped_device *md)
2698{
2699 __dm_destroy(md, false);
2700}
2701
2702void dm_put(struct mapped_device *md)
2703{
2704 atomic_dec(&md->holders);
2705}
2706EXPORT_SYMBOL_GPL(dm_put);
2707
2708static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2709{
2710 int r = 0;
2711 DECLARE_WAITQUEUE(wait, current);
2712
2713 add_wait_queue(&md->wait, &wait);
2714
2715 while (1) {
2716 set_current_state(interruptible);
2717
2718 if (!md_in_flight(md))
2719 break;
2720
2721 if (interruptible == TASK_INTERRUPTIBLE &&
2722 signal_pending(current)) {
2723 r = -EINTR;
2724 break;
2725 }
2726
2727 io_schedule();
2728 }
2729 set_current_state(TASK_RUNNING);
2730
2731 remove_wait_queue(&md->wait, &wait);
2732
2733 return r;
2734}
2735
2736
2737
2738
2739static void dm_wq_work(struct work_struct *work)
2740{
2741 struct mapped_device *md = container_of(work, struct mapped_device,
2742 work);
2743 struct bio *c;
2744 int srcu_idx;
2745 struct dm_table *map;
2746
2747 map = dm_get_live_table(md, &srcu_idx);
2748
2749 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2750 spin_lock_irq(&md->deferred_lock);
2751 c = bio_list_pop(&md->deferred);
2752 spin_unlock_irq(&md->deferred_lock);
2753
2754 if (!c)
2755 break;
2756
2757 if (dm_request_based(md))
2758 generic_make_request(c);
2759 else
2760 __split_and_process_bio(md, map, c);
2761 }
2762
2763 dm_put_live_table(md, srcu_idx);
2764}
2765
2766static void dm_queue_flush(struct mapped_device *md)
2767{
2768 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2769 smp_mb__after_atomic();
2770 queue_work(md->wq, &md->work);
2771}
2772
2773
2774
2775
2776struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2777{
2778 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
2779 struct queue_limits limits;
2780 int r;
2781
2782 mutex_lock(&md->suspend_lock);
2783
2784
2785 if (!dm_suspended_md(md))
2786 goto out;
2787
2788
2789
2790
2791
2792
2793
2794 if (dm_table_has_no_data_devices(table)) {
2795 live_map = dm_get_live_table_fast(md);
2796 if (live_map)
2797 limits = md->queue->limits;
2798 dm_put_live_table_fast(md);
2799 }
2800
2801 if (!live_map) {
2802 r = dm_calculate_queue_limits(table, &limits);
2803 if (r) {
2804 map = ERR_PTR(r);
2805 goto out;
2806 }
2807 }
2808
2809 map = __bind(md, table, &limits);
2810
2811out:
2812 mutex_unlock(&md->suspend_lock);
2813 return map;
2814}
2815
2816
2817
2818
2819
2820static int lock_fs(struct mapped_device *md)
2821{
2822 int r;
2823
2824 WARN_ON(md->frozen_sb);
2825
2826 md->frozen_sb = freeze_bdev(md->bdev);
2827 if (IS_ERR(md->frozen_sb)) {
2828 r = PTR_ERR(md->frozen_sb);
2829 md->frozen_sb = NULL;
2830 return r;
2831 }
2832
2833 set_bit(DMF_FROZEN, &md->flags);
2834
2835 return 0;
2836}
2837
2838static void unlock_fs(struct mapped_device *md)
2839{
2840 if (!test_bit(DMF_FROZEN, &md->flags))
2841 return;
2842
2843 thaw_bdev(md->bdev, md->frozen_sb);
2844 md->frozen_sb = NULL;
2845 clear_bit(DMF_FROZEN, &md->flags);
2846}
2847
2848
2849
2850
2851
2852
2853
2854
2855static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
2856 unsigned suspend_flags, int interruptible)
2857{
2858 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
2859 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
2860 int r;
2861
2862
2863
2864
2865
2866 if (noflush)
2867 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2868
2869
2870
2871
2872
2873 dm_table_presuspend_targets(map);
2874
2875
2876
2877
2878
2879
2880
2881 if (!noflush && do_lockfs) {
2882 r = lock_fs(md);
2883 if (r) {
2884 dm_table_presuspend_undo_targets(map);
2885 return r;
2886 }
2887 }
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2902 if (map)
2903 synchronize_srcu(&md->io_barrier);
2904
2905
2906
2907
2908
2909 if (dm_request_based(md)) {
2910 stop_queue(md->queue);
2911 flush_kthread_worker(&md->kworker);
2912 }
2913
2914 flush_workqueue(md->wq);
2915
2916
2917
2918
2919
2920
2921 r = dm_wait_for_completion(md, interruptible);
2922
2923 if (noflush)
2924 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2925 if (map)
2926 synchronize_srcu(&md->io_barrier);
2927
2928
2929 if (r < 0) {
2930 dm_queue_flush(md);
2931
2932 if (dm_request_based(md))
2933 start_queue(md->queue);
2934
2935 unlock_fs(md);
2936 dm_table_presuspend_undo_targets(map);
2937
2938 }
2939
2940 return r;
2941}
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2960{
2961 struct dm_table *map = NULL;
2962 int r = 0;
2963
2964retry:
2965 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2966
2967 if (dm_suspended_md(md)) {
2968 r = -EINVAL;
2969 goto out_unlock;
2970 }
2971
2972 if (dm_suspended_internally_md(md)) {
2973
2974 mutex_unlock(&md->suspend_lock);
2975 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2976 if (r)
2977 return r;
2978 goto retry;
2979 }
2980
2981 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2982
2983 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
2984 if (r)
2985 goto out_unlock;
2986
2987 set_bit(DMF_SUSPENDED, &md->flags);
2988
2989 dm_table_postsuspend_targets(map);
2990
2991out_unlock:
2992 mutex_unlock(&md->suspend_lock);
2993 return r;
2994}
2995
2996static int __dm_resume(struct mapped_device *md, struct dm_table *map)
2997{
2998 if (map) {
2999 int r = dm_table_resume_targets(map);
3000 if (r)
3001 return r;
3002 }
3003
3004 dm_queue_flush(md);
3005
3006
3007
3008
3009
3010
3011 if (dm_request_based(md))
3012 start_queue(md->queue);
3013
3014 unlock_fs(md);
3015
3016 return 0;
3017}
3018
3019int dm_resume(struct mapped_device *md)
3020{
3021 int r = -EINVAL;
3022 struct dm_table *map = NULL;
3023
3024retry:
3025 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
3026
3027 if (!dm_suspended_md(md))
3028 goto out;
3029
3030 if (dm_suspended_internally_md(md)) {
3031
3032 mutex_unlock(&md->suspend_lock);
3033 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
3034 if (r)
3035 return r;
3036 goto retry;
3037 }
3038
3039 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3040 if (!map || !dm_table_get_size(map))
3041 goto out;
3042
3043 r = __dm_resume(md, map);
3044 if (r)
3045 goto out;
3046
3047 clear_bit(DMF_SUSPENDED, &md->flags);
3048
3049 r = 0;
3050out:
3051 mutex_unlock(&md->suspend_lock);
3052
3053 return r;
3054}
3055
3056
3057
3058
3059
3060
3061
3062static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
3063{
3064 struct dm_table *map = NULL;
3065
3066 if (md->internal_suspend_count++)
3067 return;
3068
3069 if (dm_suspended_md(md)) {
3070 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3071 return;
3072 }
3073
3074 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
3075
3076
3077
3078
3079
3080
3081
3082 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
3083
3084 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3085
3086 dm_table_postsuspend_targets(map);
3087}
3088
3089static void __dm_internal_resume(struct mapped_device *md)
3090{
3091 BUG_ON(!md->internal_suspend_count);
3092
3093 if (--md->internal_suspend_count)
3094 return;
3095
3096 if (dm_suspended_md(md))
3097 goto done;
3098
3099
3100
3101
3102
3103 (void) __dm_resume(md, NULL);
3104
3105done:
3106 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3107 smp_mb__after_atomic();
3108 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
3109}
3110
3111void dm_internal_suspend_noflush(struct mapped_device *md)
3112{
3113 mutex_lock(&md->suspend_lock);
3114 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
3115 mutex_unlock(&md->suspend_lock);
3116}
3117EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
3118
3119void dm_internal_resume(struct mapped_device *md)
3120{
3121 mutex_lock(&md->suspend_lock);
3122 __dm_internal_resume(md);
3123 mutex_unlock(&md->suspend_lock);
3124}
3125EXPORT_SYMBOL_GPL(dm_internal_resume);
3126
3127
3128
3129
3130
3131
3132void dm_internal_suspend_fast(struct mapped_device *md)
3133{
3134 mutex_lock(&md->suspend_lock);
3135 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
3136 return;
3137
3138 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
3139 synchronize_srcu(&md->io_barrier);
3140 flush_workqueue(md->wq);
3141 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
3142}
3143EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
3144
3145void dm_internal_resume_fast(struct mapped_device *md)
3146{
3147 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
3148 goto done;
3149
3150 dm_queue_flush(md);
3151
3152done:
3153 mutex_unlock(&md->suspend_lock);
3154}
3155EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
3156
3157
3158
3159
3160int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
3161 unsigned cookie)
3162{
3163 char udev_cookie[DM_COOKIE_LENGTH];
3164 char *envp[] = { udev_cookie, NULL };
3165
3166 if (!cookie)
3167 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
3168 else {
3169 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
3170 DM_COOKIE_ENV_VAR_NAME, cookie);
3171 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
3172 action, envp);
3173 }
3174}
3175
3176uint32_t dm_next_uevent_seq(struct mapped_device *md)
3177{
3178 return atomic_add_return(1, &md->uevent_seq);
3179}
3180
3181uint32_t dm_get_event_nr(struct mapped_device *md)
3182{
3183 return atomic_read(&md->event_nr);
3184}
3185
3186int dm_wait_event(struct mapped_device *md, int event_nr)
3187{
3188 return wait_event_interruptible(md->eventq,
3189 (event_nr != atomic_read(&md->event_nr)));
3190}
3191
3192void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
3193{
3194 unsigned long flags;
3195
3196 spin_lock_irqsave(&md->uevent_lock, flags);
3197 list_add(elist, &md->uevent_list);
3198 spin_unlock_irqrestore(&md->uevent_lock, flags);
3199}
3200
3201
3202
3203
3204
3205struct gendisk *dm_disk(struct mapped_device *md)
3206{
3207 return md->disk;
3208}
3209
3210struct kobject *dm_kobject(struct mapped_device *md)
3211{
3212 return &md->kobj_holder.kobj;
3213}
3214
3215struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
3216{
3217 struct mapped_device *md;
3218
3219 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
3220
3221 if (test_bit(DMF_FREEING, &md->flags) ||
3222 dm_deleting_md(md))
3223 return NULL;
3224
3225 dm_get(md);
3226 return md;
3227}
3228
3229int dm_suspended_md(struct mapped_device *md)
3230{
3231 return test_bit(DMF_SUSPENDED, &md->flags);
3232}
3233
3234int dm_suspended_internally_md(struct mapped_device *md)
3235{
3236 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3237}
3238
3239int dm_test_deferred_remove_flag(struct mapped_device *md)
3240{
3241 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
3242}
3243
3244int dm_suspended(struct dm_target *ti)
3245{
3246 return dm_suspended_md(dm_table_get_md(ti->table));
3247}
3248EXPORT_SYMBOL_GPL(dm_suspended);
3249
3250int dm_noflush_suspending(struct dm_target *ti)
3251{
3252 return __noflush_suspending(dm_table_get_md(ti->table));
3253}
3254EXPORT_SYMBOL_GPL(dm_noflush_suspending);
3255
3256struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
3257{
3258 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
3259 struct kmem_cache *cachep;
3260 unsigned int pool_size = 0;
3261 unsigned int front_pad;
3262
3263 if (!pools)
3264 return NULL;
3265
3266 switch (type) {
3267 case DM_TYPE_BIO_BASED:
3268 cachep = _io_cache;
3269 pool_size = dm_get_reserved_bio_based_ios();
3270 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
3271 break;
3272 case DM_TYPE_REQUEST_BASED:
3273 pool_size = dm_get_reserved_rq_based_ios();
3274 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
3275 if (!pools->rq_pool)
3276 goto out;
3277
3278 case DM_TYPE_MQ_REQUEST_BASED:
3279 cachep = _rq_tio_cache;
3280 if (!pool_size)
3281 pool_size = dm_get_reserved_rq_based_ios();
3282 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
3283
3284 WARN_ON(per_bio_data_size != 0);
3285 break;
3286 default:
3287 goto out;
3288 }
3289
3290 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
3291 if (!pools->io_pool)
3292 goto out;
3293
3294 pools->bs = bioset_create_nobvec(pool_size, front_pad);
3295 if (!pools->bs)
3296 goto out;
3297
3298 if (integrity && bioset_integrity_create(pools->bs, pool_size))
3299 goto out;
3300
3301 return pools;
3302
3303out:
3304 dm_free_md_mempools(pools);
3305
3306 return NULL;
3307}
3308
3309void dm_free_md_mempools(struct dm_md_mempools *pools)
3310{
3311 if (!pools)
3312 return;
3313
3314 if (pools->io_pool)
3315 mempool_destroy(pools->io_pool);
3316
3317 if (pools->rq_pool)
3318 mempool_destroy(pools->rq_pool);
3319
3320 if (pools->bs)
3321 bioset_free(pools->bs);
3322
3323 kfree(pools);
3324}
3325
3326static const struct block_device_operations dm_blk_dops = {
3327 .open = dm_blk_open,
3328 .release = dm_blk_close,
3329 .ioctl = dm_blk_ioctl,
3330 .getgeo = dm_blk_getgeo,
3331 .owner = THIS_MODULE
3332};
3333
3334
3335
3336
3337module_init(dm_init);
3338module_exit(dm_exit);
3339
3340module_param(major, uint, 0);
3341MODULE_PARM_DESC(major, "The major number of the device mapper");
3342
3343module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3344MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3345
3346module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
3347MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
3348
3349MODULE_DESCRIPTION(DM_NAME " driver");
3350MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3351MODULE_LICENSE("GPL");
3352