1
2
3
4
5
6
7
8#include "dm.h"
9#include "dm-uevent.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/mutex.h>
14#include <linux/moduleparam.h>
15#include <linux/blkpg.h>
16#include <linux/bio.h>
17#include <linux/buffer_head.h>
18#include <linux/mempool.h>
19#include <linux/slab.h>
20#include <linux/idr.h>
21#include <linux/hdreg.h>
22
23#include <trace/events/block.h>
24
25#define DM_MSG_PREFIX "core"
26
27
28
29
30
31#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
32#define DM_COOKIE_LENGTH 24
33
34static const char *_name = DM_NAME;
35
36static unsigned int major = 0;
37static unsigned int _major = 0;
38
39static DEFINE_SPINLOCK(_minor_lock);
40
41
42
43
44struct dm_io {
45 struct mapped_device *md;
46 int error;
47 atomic_t io_count;
48 struct bio *bio;
49 unsigned long start_time;
50 spinlock_t endio_lock;
51};
52
53
54
55
56
57
58struct dm_target_io {
59 struct dm_io *io;
60 struct dm_target *ti;
61 union map_info info;
62};
63
64
65
66
67
68struct dm_rq_target_io {
69 struct mapped_device *md;
70 struct dm_target *ti;
71 struct request *orig, clone;
72 int error;
73 union map_info info;
74};
75
76
77
78
79
80struct dm_rq_clone_bio_info {
81 struct bio *orig;
82 struct dm_rq_target_io *tio;
83};
84
85union map_info *dm_get_mapinfo(struct bio *bio)
86{
87 if (bio && bio->bi_private)
88 return &((struct dm_target_io *)bio->bi_private)->info;
89 return NULL;
90}
91
92union map_info *dm_get_rq_mapinfo(struct request *rq)
93{
94 if (rq && rq->end_io_data)
95 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
96 return NULL;
97}
98EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
99
100#define MINOR_ALLOCED ((void *)-1)
101
102
103
104
105#define DMF_BLOCK_IO_FOR_SUSPEND 0
106#define DMF_SUSPENDED 1
107#define DMF_FROZEN 2
108#define DMF_FREEING 3
109#define DMF_DELETING 4
110#define DMF_NOFLUSH_SUSPENDING 5
111#define DMF_QUEUE_IO_TO_THREAD 6
112
113
114
115
116struct mapped_device {
117 struct rw_semaphore io_lock;
118 struct mutex suspend_lock;
119 rwlock_t map_lock;
120 atomic_t holders;
121 atomic_t open_count;
122
123 unsigned long flags;
124
125 struct request_queue *queue;
126 struct gendisk *disk;
127 char name[16];
128
129 void *interface_ptr;
130
131
132
133
134 atomic_t pending[2];
135 wait_queue_head_t wait;
136 struct work_struct work;
137 struct bio_list deferred;
138 spinlock_t deferred_lock;
139
140
141
142
143 int barrier_error;
144
145
146
147
148 struct workqueue_struct *wq;
149
150
151
152
153 struct dm_table *map;
154
155
156
157
158 mempool_t *io_pool;
159 mempool_t *tio_pool;
160
161 struct bio_set *bs;
162
163
164
165
166 atomic_t event_nr;
167 wait_queue_head_t eventq;
168 atomic_t uevent_seq;
169 struct list_head uevent_list;
170 spinlock_t uevent_lock;
171
172
173
174
175 struct super_block *frozen_sb;
176 struct block_device *bdev;
177
178
179 struct hd_geometry geometry;
180
181
182 struct request suspend_rq;
183
184
185 make_request_fn *saved_make_request_fn;
186
187
188 struct kobject kobj;
189
190
191 struct bio barrier_bio;
192};
193
194
195
196
197struct dm_md_mempools {
198 mempool_t *io_pool;
199 mempool_t *tio_pool;
200 struct bio_set *bs;
201};
202
203#define MIN_IOS 256
204static struct kmem_cache *_io_cache;
205static struct kmem_cache *_tio_cache;
206static struct kmem_cache *_rq_tio_cache;
207static struct kmem_cache *_rq_bio_info_cache;
208
209static int __init local_init(void)
210{
211 int r = -ENOMEM;
212
213
214 _io_cache = KMEM_CACHE(dm_io, 0);
215 if (!_io_cache)
216 return r;
217
218
219 _tio_cache = KMEM_CACHE(dm_target_io, 0);
220 if (!_tio_cache)
221 goto out_free_io_cache;
222
223 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
224 if (!_rq_tio_cache)
225 goto out_free_tio_cache;
226
227 _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
228 if (!_rq_bio_info_cache)
229 goto out_free_rq_tio_cache;
230
231 r = dm_uevent_init();
232 if (r)
233 goto out_free_rq_bio_info_cache;
234
235 _major = major;
236 r = register_blkdev(_major, _name);
237 if (r < 0)
238 goto out_uevent_exit;
239
240 if (!_major)
241 _major = r;
242
243 return 0;
244
245out_uevent_exit:
246 dm_uevent_exit();
247out_free_rq_bio_info_cache:
248 kmem_cache_destroy(_rq_bio_info_cache);
249out_free_rq_tio_cache:
250 kmem_cache_destroy(_rq_tio_cache);
251out_free_tio_cache:
252 kmem_cache_destroy(_tio_cache);
253out_free_io_cache:
254 kmem_cache_destroy(_io_cache);
255
256 return r;
257}
258
259static void local_exit(void)
260{
261 kmem_cache_destroy(_rq_bio_info_cache);
262 kmem_cache_destroy(_rq_tio_cache);
263 kmem_cache_destroy(_tio_cache);
264 kmem_cache_destroy(_io_cache);
265 unregister_blkdev(_major, _name);
266 dm_uevent_exit();
267
268 _major = 0;
269
270 DMINFO("cleaned up");
271}
272
273static int (*_inits[])(void) __initdata = {
274 local_init,
275 dm_target_init,
276 dm_linear_init,
277 dm_stripe_init,
278 dm_kcopyd_init,
279 dm_interface_init,
280};
281
282static void (*_exits[])(void) = {
283 local_exit,
284 dm_target_exit,
285 dm_linear_exit,
286 dm_stripe_exit,
287 dm_kcopyd_exit,
288 dm_interface_exit,
289};
290
291static int __init dm_init(void)
292{
293 const int count = ARRAY_SIZE(_inits);
294
295 int r, i;
296
297 for (i = 0; i < count; i++) {
298 r = _inits[i]();
299 if (r)
300 goto bad;
301 }
302
303 return 0;
304
305 bad:
306 while (i--)
307 _exits[i]();
308
309 return r;
310}
311
312static void __exit dm_exit(void)
313{
314 int i = ARRAY_SIZE(_exits);
315
316 while (i--)
317 _exits[i]();
318}
319
320
321
322
323static int dm_blk_open(struct block_device *bdev, fmode_t mode)
324{
325 struct mapped_device *md;
326
327 spin_lock(&_minor_lock);
328
329 md = bdev->bd_disk->private_data;
330 if (!md)
331 goto out;
332
333 if (test_bit(DMF_FREEING, &md->flags) ||
334 test_bit(DMF_DELETING, &md->flags)) {
335 md = NULL;
336 goto out;
337 }
338
339 dm_get(md);
340 atomic_inc(&md->open_count);
341
342out:
343 spin_unlock(&_minor_lock);
344
345 return md ? 0 : -ENXIO;
346}
347
348static int dm_blk_close(struct gendisk *disk, fmode_t mode)
349{
350 struct mapped_device *md = disk->private_data;
351 atomic_dec(&md->open_count);
352 dm_put(md);
353 return 0;
354}
355
356int dm_open_count(struct mapped_device *md)
357{
358 return atomic_read(&md->open_count);
359}
360
361
362
363
364int dm_lock_for_deletion(struct mapped_device *md)
365{
366 int r = 0;
367
368 spin_lock(&_minor_lock);
369
370 if (dm_open_count(md))
371 r = -EBUSY;
372 else
373 set_bit(DMF_DELETING, &md->flags);
374
375 spin_unlock(&_minor_lock);
376
377 return r;
378}
379
380static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
381{
382 struct mapped_device *md = bdev->bd_disk->private_data;
383
384 return dm_get_geometry(md, geo);
385}
386
387static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
388 unsigned int cmd, unsigned long arg)
389{
390 struct mapped_device *md = bdev->bd_disk->private_data;
391 struct dm_table *map = dm_get_table(md);
392 struct dm_target *tgt;
393 int r = -ENOTTY;
394
395 if (!map || !dm_table_get_size(map))
396 goto out;
397
398
399 if (dm_table_get_num_targets(map) != 1)
400 goto out;
401
402 tgt = dm_table_get_target(map, 0);
403
404 if (dm_suspended(md)) {
405 r = -EAGAIN;
406 goto out;
407 }
408
409 if (tgt->type->ioctl)
410 r = tgt->type->ioctl(tgt, cmd, arg);
411
412out:
413 dm_table_put(map);
414
415 return r;
416}
417
418static struct dm_io *alloc_io(struct mapped_device *md)
419{
420 return mempool_alloc(md->io_pool, GFP_NOIO);
421}
422
423static void free_io(struct mapped_device *md, struct dm_io *io)
424{
425 mempool_free(io, md->io_pool);
426}
427
428static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
429{
430 mempool_free(tio, md->tio_pool);
431}
432
433static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
434{
435 return mempool_alloc(md->tio_pool, GFP_ATOMIC);
436}
437
438static void free_rq_tio(struct dm_rq_target_io *tio)
439{
440 mempool_free(tio, tio->md->tio_pool);
441}
442
443static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
444{
445 return mempool_alloc(md->io_pool, GFP_ATOMIC);
446}
447
448static void free_bio_info(struct dm_rq_clone_bio_info *info)
449{
450 mempool_free(info, info->tio->md->io_pool);
451}
452
453static void start_io_acct(struct dm_io *io)
454{
455 struct mapped_device *md = io->md;
456 int cpu;
457 int rw = bio_data_dir(io->bio);
458
459 io->start_time = jiffies;
460
461 cpu = part_stat_lock();
462 part_round_stats(cpu, &dm_disk(md)->part0);
463 part_stat_unlock();
464 dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
465}
466
467static void end_io_acct(struct dm_io *io)
468{
469 struct mapped_device *md = io->md;
470 struct bio *bio = io->bio;
471 unsigned long duration = jiffies - io->start_time;
472 int pending, cpu;
473 int rw = bio_data_dir(bio);
474
475 cpu = part_stat_lock();
476 part_round_stats(cpu, &dm_disk(md)->part0);
477 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
478 part_stat_unlock();
479
480
481
482
483
484 dm_disk(md)->part0.in_flight[rw] = pending =
485 atomic_dec_return(&md->pending[rw]);
486 pending += atomic_read(&md->pending[rw^0x1]);
487
488
489 if (!pending)
490 wake_up(&md->wait);
491}
492
493
494
495
496static void queue_io(struct mapped_device *md, struct bio *bio)
497{
498 down_write(&md->io_lock);
499
500 spin_lock_irq(&md->deferred_lock);
501 bio_list_add(&md->deferred, bio);
502 spin_unlock_irq(&md->deferred_lock);
503
504 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
505 queue_work(md->wq, &md->work);
506
507 up_write(&md->io_lock);
508}
509
510
511
512
513
514
515struct dm_table *dm_get_table(struct mapped_device *md)
516{
517 struct dm_table *t;
518 unsigned long flags;
519
520 read_lock_irqsave(&md->map_lock, flags);
521 t = md->map;
522 if (t)
523 dm_table_get(t);
524 read_unlock_irqrestore(&md->map_lock, flags);
525
526 return t;
527}
528
529
530
531
532int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
533{
534 *geo = md->geometry;
535
536 return 0;
537}
538
539
540
541
542int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
543{
544 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
545
546 if (geo->start > sz) {
547 DMWARN("Start sector is beyond the geometry limits.");
548 return -EINVAL;
549 }
550
551 md->geometry = *geo;
552
553 return 0;
554}
555
556
557
558
559
560
561
562
563
564
565static int __noflush_suspending(struct mapped_device *md)
566{
567 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
568}
569
570
571
572
573
574static void dec_pending(struct dm_io *io, int error)
575{
576 unsigned long flags;
577 int io_error;
578 struct bio *bio;
579 struct mapped_device *md = io->md;
580
581
582 if (unlikely(error)) {
583 spin_lock_irqsave(&io->endio_lock, flags);
584 if (!(io->error > 0 && __noflush_suspending(md)))
585 io->error = error;
586 spin_unlock_irqrestore(&io->endio_lock, flags);
587 }
588
589 if (atomic_dec_and_test(&io->io_count)) {
590 if (io->error == DM_ENDIO_REQUEUE) {
591
592
593
594 spin_lock_irqsave(&md->deferred_lock, flags);
595 if (__noflush_suspending(md)) {
596 if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
597 bio_list_add_head(&md->deferred,
598 io->bio);
599 } else
600
601 io->error = -EIO;
602 spin_unlock_irqrestore(&md->deferred_lock, flags);
603 }
604
605 io_error = io->error;
606 bio = io->bio;
607
608 if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
609
610
611
612
613
614 if (!md->barrier_error && io_error != -EOPNOTSUPP)
615 md->barrier_error = io_error;
616 end_io_acct(io);
617 } else {
618 end_io_acct(io);
619
620 if (io_error != DM_ENDIO_REQUEUE) {
621 trace_block_bio_complete(md->queue, bio);
622
623 bio_endio(bio, io_error);
624 }
625 }
626
627 free_io(md, io);
628 }
629}
630
631static void clone_endio(struct bio *bio, int error)
632{
633 int r = 0;
634 struct dm_target_io *tio = bio->bi_private;
635 struct dm_io *io = tio->io;
636 struct mapped_device *md = tio->io->md;
637 dm_endio_fn endio = tio->ti->type->end_io;
638
639 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
640 error = -EIO;
641
642 if (endio) {
643 r = endio(tio->ti, bio, error, &tio->info);
644 if (r < 0 || r == DM_ENDIO_REQUEUE)
645
646
647
648
649 error = r;
650 else if (r == DM_ENDIO_INCOMPLETE)
651
652 return;
653 else if (r) {
654 DMWARN("unimplemented target endio return value: %d", r);
655 BUG();
656 }
657 }
658
659
660
661
662 bio->bi_private = md->bs;
663
664 free_tio(md, tio);
665 bio_put(bio);
666 dec_pending(io, error);
667}
668
669
670
671
672static void end_clone_bio(struct bio *clone, int error)
673{
674 struct dm_rq_clone_bio_info *info = clone->bi_private;
675 struct dm_rq_target_io *tio = info->tio;
676 struct bio *bio = info->orig;
677 unsigned int nr_bytes = info->orig->bi_size;
678
679 bio_put(clone);
680
681 if (tio->error)
682
683
684
685
686
687 return;
688 else if (error) {
689
690
691
692
693
694 tio->error = error;
695 return;
696 }
697
698
699
700
701
702
703
704
705
706
707
708 if (tio->orig->bio != bio)
709 DMERR("bio completion is going in the middle of the request");
710
711
712
713
714
715
716 blk_update_request(tio->orig, 0, nr_bytes);
717}
718
719
720
721
722
723
724static void rq_completed(struct mapped_device *md, int run_queue)
725{
726 int wakeup_waiters = 0;
727 struct request_queue *q = md->queue;
728 unsigned long flags;
729
730 spin_lock_irqsave(q->queue_lock, flags);
731 if (!queue_in_flight(q))
732 wakeup_waiters = 1;
733 spin_unlock_irqrestore(q->queue_lock, flags);
734
735
736 if (wakeup_waiters)
737 wake_up(&md->wait);
738
739 if (run_queue)
740 blk_run_queue(q);
741
742
743
744
745 dm_put(md);
746}
747
748static void free_rq_clone(struct request *clone)
749{
750 struct dm_rq_target_io *tio = clone->end_io_data;
751
752 blk_rq_unprep_clone(clone);
753 free_rq_tio(tio);
754}
755
756static void dm_unprep_request(struct request *rq)
757{
758 struct request *clone = rq->special;
759
760 rq->special = NULL;
761 rq->cmd_flags &= ~REQ_DONTPREP;
762
763 free_rq_clone(clone);
764}
765
766
767
768
769void dm_requeue_unmapped_request(struct request *clone)
770{
771 struct dm_rq_target_io *tio = clone->end_io_data;
772 struct mapped_device *md = tio->md;
773 struct request *rq = tio->orig;
774 struct request_queue *q = rq->q;
775 unsigned long flags;
776
777 dm_unprep_request(rq);
778
779 spin_lock_irqsave(q->queue_lock, flags);
780 if (elv_queue_empty(q))
781 blk_plug_device(q);
782 blk_requeue_request(q, rq);
783 spin_unlock_irqrestore(q->queue_lock, flags);
784
785 rq_completed(md, 0);
786}
787EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
788
789static void __stop_queue(struct request_queue *q)
790{
791 blk_stop_queue(q);
792}
793
794static void stop_queue(struct request_queue *q)
795{
796 unsigned long flags;
797
798 spin_lock_irqsave(q->queue_lock, flags);
799 __stop_queue(q);
800 spin_unlock_irqrestore(q->queue_lock, flags);
801}
802
803static void __start_queue(struct request_queue *q)
804{
805 if (blk_queue_stopped(q))
806 blk_start_queue(q);
807}
808
809static void start_queue(struct request_queue *q)
810{
811 unsigned long flags;
812
813 spin_lock_irqsave(q->queue_lock, flags);
814 __start_queue(q);
815 spin_unlock_irqrestore(q->queue_lock, flags);
816}
817
818
819
820
821
822static void dm_end_request(struct request *clone, int error)
823{
824 struct dm_rq_target_io *tio = clone->end_io_data;
825 struct mapped_device *md = tio->md;
826 struct request *rq = tio->orig;
827
828 if (blk_pc_request(rq)) {
829 rq->errors = clone->errors;
830 rq->resid_len = clone->resid_len;
831
832 if (rq->sense)
833
834
835
836
837
838 rq->sense_len = clone->sense_len;
839 }
840
841 free_rq_clone(clone);
842
843 blk_end_request_all(rq, error);
844
845 rq_completed(md, 1);
846}
847
848
849
850
851static void dm_softirq_done(struct request *rq)
852{
853 struct request *clone = rq->completion_data;
854 struct dm_rq_target_io *tio = clone->end_io_data;
855 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
856 int error = tio->error;
857
858 if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io)
859 error = rq_end_io(tio->ti, clone, error, &tio->info);
860
861 if (error <= 0)
862
863 dm_end_request(clone, error);
864 else if (error == DM_ENDIO_INCOMPLETE)
865
866 return;
867 else if (error == DM_ENDIO_REQUEUE)
868
869 dm_requeue_unmapped_request(clone);
870 else {
871 DMWARN("unimplemented target endio return value: %d", error);
872 BUG();
873 }
874}
875
876
877
878
879
880static void dm_complete_request(struct request *clone, int error)
881{
882 struct dm_rq_target_io *tio = clone->end_io_data;
883 struct request *rq = tio->orig;
884
885 tio->error = error;
886 rq->completion_data = clone;
887 blk_complete_request(rq);
888}
889
890
891
892
893
894
895
896void dm_kill_unmapped_request(struct request *clone, int error)
897{
898 struct dm_rq_target_io *tio = clone->end_io_data;
899 struct request *rq = tio->orig;
900
901 rq->cmd_flags |= REQ_FAILED;
902 dm_complete_request(clone, error);
903}
904EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
905
906
907
908
909static void end_clone_request(struct request *clone, int error)
910{
911
912
913
914
915
916
917 __blk_put_request(clone->q, clone);
918
919
920
921
922
923
924
925
926
927 dm_complete_request(clone, error);
928}
929
930static sector_t max_io_len(struct mapped_device *md,
931 sector_t sector, struct dm_target *ti)
932{
933 sector_t offset = sector - ti->begin;
934 sector_t len = ti->len - offset;
935
936
937
938
939 if (ti->split_io) {
940 sector_t boundary;
941 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
942 - offset;
943 if (len > boundary)
944 len = boundary;
945 }
946
947 return len;
948}
949
950static void __map_bio(struct dm_target *ti, struct bio *clone,
951 struct dm_target_io *tio)
952{
953 int r;
954 sector_t sector;
955 struct mapped_device *md;
956
957 clone->bi_end_io = clone_endio;
958 clone->bi_private = tio;
959
960
961
962
963
964
965 atomic_inc(&tio->io->io_count);
966 sector = clone->bi_sector;
967 r = ti->type->map(ti, clone, &tio->info);
968 if (r == DM_MAPIO_REMAPPED) {
969
970
971 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
972 tio->io->bio->bi_bdev->bd_dev, sector);
973
974 generic_make_request(clone);
975 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
976
977 md = tio->io->md;
978 dec_pending(tio->io, r);
979
980
981
982 clone->bi_private = md->bs;
983 bio_put(clone);
984 free_tio(md, tio);
985 } else if (r) {
986 DMWARN("unimplemented target map return value: %d", r);
987 BUG();
988 }
989}
990
991struct clone_info {
992 struct mapped_device *md;
993 struct dm_table *map;
994 struct bio *bio;
995 struct dm_io *io;
996 sector_t sector;
997 sector_t sector_count;
998 unsigned short idx;
999};
1000
1001static void dm_bio_destructor(struct bio *bio)
1002{
1003 struct bio_set *bs = bio->bi_private;
1004
1005 bio_free(bio, bs);
1006}
1007
1008
1009
1010
1011static struct bio *split_bvec(struct bio *bio, sector_t sector,
1012 unsigned short idx, unsigned int offset,
1013 unsigned int len, struct bio_set *bs)
1014{
1015 struct bio *clone;
1016 struct bio_vec *bv = bio->bi_io_vec + idx;
1017
1018 clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
1019 clone->bi_destructor = dm_bio_destructor;
1020 *clone->bi_io_vec = *bv;
1021
1022 clone->bi_sector = sector;
1023 clone->bi_bdev = bio->bi_bdev;
1024 clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
1025 clone->bi_vcnt = 1;
1026 clone->bi_size = to_bytes(len);
1027 clone->bi_io_vec->bv_offset = offset;
1028 clone->bi_io_vec->bv_len = clone->bi_size;
1029 clone->bi_flags |= 1 << BIO_CLONED;
1030
1031 if (bio_integrity(bio)) {
1032 bio_integrity_clone(clone, bio, GFP_NOIO, bs);
1033 bio_integrity_trim(clone,
1034 bio_sector_offset(bio, idx, offset), len);
1035 }
1036
1037 return clone;
1038}
1039
1040
1041
1042
1043static struct bio *clone_bio(struct bio *bio, sector_t sector,
1044 unsigned short idx, unsigned short bv_count,
1045 unsigned int len, struct bio_set *bs)
1046{
1047 struct bio *clone;
1048
1049 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
1050 __bio_clone(clone, bio);
1051 clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
1052 clone->bi_destructor = dm_bio_destructor;
1053 clone->bi_sector = sector;
1054 clone->bi_idx = idx;
1055 clone->bi_vcnt = idx + bv_count;
1056 clone->bi_size = to_bytes(len);
1057 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
1058
1059 if (bio_integrity(bio)) {
1060 bio_integrity_clone(clone, bio, GFP_NOIO, bs);
1061
1062 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
1063 bio_integrity_trim(clone,
1064 bio_sector_offset(bio, idx, 0), len);
1065 }
1066
1067 return clone;
1068}
1069
1070static struct dm_target_io *alloc_tio(struct clone_info *ci,
1071 struct dm_target *ti)
1072{
1073 struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO);
1074
1075 tio->io = ci->io;
1076 tio->ti = ti;
1077 memset(&tio->info, 0, sizeof(tio->info));
1078
1079 return tio;
1080}
1081
1082static void __flush_target(struct clone_info *ci, struct dm_target *ti,
1083 unsigned flush_nr)
1084{
1085 struct dm_target_io *tio = alloc_tio(ci, ti);
1086 struct bio *clone;
1087
1088 tio->info.flush_request = flush_nr;
1089
1090 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
1091 __bio_clone(clone, ci->bio);
1092 clone->bi_destructor = dm_bio_destructor;
1093
1094 __map_bio(ti, clone, tio);
1095}
1096
1097static int __clone_and_map_empty_barrier(struct clone_info *ci)
1098{
1099 unsigned target_nr = 0, flush_nr;
1100 struct dm_target *ti;
1101
1102 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1103 for (flush_nr = 0; flush_nr < ti->num_flush_requests;
1104 flush_nr++)
1105 __flush_target(ci, ti, flush_nr);
1106
1107 ci->sector_count = 0;
1108
1109 return 0;
1110}
1111
1112static int __clone_and_map(struct clone_info *ci)
1113{
1114 struct bio *clone, *bio = ci->bio;
1115 struct dm_target *ti;
1116 sector_t len = 0, max;
1117 struct dm_target_io *tio;
1118
1119 if (unlikely(bio_empty_barrier(bio)))
1120 return __clone_and_map_empty_barrier(ci);
1121
1122 ti = dm_table_find_target(ci->map, ci->sector);
1123 if (!dm_target_is_valid(ti))
1124 return -EIO;
1125
1126 max = max_io_len(ci->md, ci->sector, ti);
1127
1128
1129
1130
1131 tio = alloc_tio(ci, ti);
1132
1133 if (ci->sector_count <= max) {
1134
1135
1136
1137
1138 clone = clone_bio(bio, ci->sector, ci->idx,
1139 bio->bi_vcnt - ci->idx, ci->sector_count,
1140 ci->md->bs);
1141 __map_bio(ti, clone, tio);
1142 ci->sector_count = 0;
1143
1144 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
1145
1146
1147
1148
1149 int i;
1150 sector_t remaining = max;
1151 sector_t bv_len;
1152
1153 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
1154 bv_len = to_sector(bio->bi_io_vec[i].bv_len);
1155
1156 if (bv_len > remaining)
1157 break;
1158
1159 remaining -= bv_len;
1160 len += bv_len;
1161 }
1162
1163 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
1164 ci->md->bs);
1165 __map_bio(ti, clone, tio);
1166
1167 ci->sector += len;
1168 ci->sector_count -= len;
1169 ci->idx = i;
1170
1171 } else {
1172
1173
1174
1175 struct bio_vec *bv = bio->bi_io_vec + ci->idx;
1176 sector_t remaining = to_sector(bv->bv_len);
1177 unsigned int offset = 0;
1178
1179 do {
1180 if (offset) {
1181 ti = dm_table_find_target(ci->map, ci->sector);
1182 if (!dm_target_is_valid(ti))
1183 return -EIO;
1184
1185 max = max_io_len(ci->md, ci->sector, ti);
1186
1187 tio = alloc_tio(ci, ti);
1188 }
1189
1190 len = min(remaining, max);
1191
1192 clone = split_bvec(bio, ci->sector, ci->idx,
1193 bv->bv_offset + offset, len,
1194 ci->md->bs);
1195
1196 __map_bio(ti, clone, tio);
1197
1198 ci->sector += len;
1199 ci->sector_count -= len;
1200 offset += to_bytes(len);
1201 } while (remaining -= len);
1202
1203 ci->idx++;
1204 }
1205
1206 return 0;
1207}
1208
1209
1210
1211
1212static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1213{
1214 struct clone_info ci;
1215 int error = 0;
1216
1217 ci.map = dm_get_table(md);
1218 if (unlikely(!ci.map)) {
1219 if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
1220 bio_io_error(bio);
1221 else
1222 if (!md->barrier_error)
1223 md->barrier_error = -EIO;
1224 return;
1225 }
1226
1227 ci.md = md;
1228 ci.bio = bio;
1229 ci.io = alloc_io(md);
1230 ci.io->error = 0;
1231 atomic_set(&ci.io->io_count, 1);
1232 ci.io->bio = bio;
1233 ci.io->md = md;
1234 spin_lock_init(&ci.io->endio_lock);
1235 ci.sector = bio->bi_sector;
1236 ci.sector_count = bio_sectors(bio);
1237 if (unlikely(bio_empty_barrier(bio)))
1238 ci.sector_count = 1;
1239 ci.idx = bio->bi_idx;
1240
1241 start_io_acct(ci.io);
1242 while (ci.sector_count && !error)
1243 error = __clone_and_map(&ci);
1244
1245
1246 dec_pending(ci.io, error);
1247 dm_table_put(ci.map);
1248}
1249
1250
1251
1252
1253static int dm_merge_bvec(struct request_queue *q,
1254 struct bvec_merge_data *bvm,
1255 struct bio_vec *biovec)
1256{
1257 struct mapped_device *md = q->queuedata;
1258 struct dm_table *map = dm_get_table(md);
1259 struct dm_target *ti;
1260 sector_t max_sectors;
1261 int max_size = 0;
1262
1263 if (unlikely(!map))
1264 goto out;
1265
1266 ti = dm_table_find_target(map, bvm->bi_sector);
1267 if (!dm_target_is_valid(ti))
1268 goto out_table;
1269
1270
1271
1272
1273 max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
1274 (sector_t) BIO_MAX_SECTORS);
1275 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
1276 if (max_size < 0)
1277 max_size = 0;
1278
1279
1280
1281
1282
1283
1284 if (max_size && ti->type->merge)
1285 max_size = ti->type->merge(ti, bvm, biovec, max_size);
1286
1287
1288
1289
1290
1291
1292
1293 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1294
1295 max_size = 0;
1296
1297out_table:
1298 dm_table_put(map);
1299
1300out:
1301
1302
1303
1304 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
1305 max_size = biovec->bv_len;
1306
1307 return max_size;
1308}
1309
1310
1311
1312
1313
1314static int _dm_request(struct request_queue *q, struct bio *bio)
1315{
1316 int rw = bio_data_dir(bio);
1317 struct mapped_device *md = q->queuedata;
1318 int cpu;
1319
1320 down_read(&md->io_lock);
1321
1322 cpu = part_stat_lock();
1323 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
1324 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
1325 part_stat_unlock();
1326
1327
1328
1329
1330
1331 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1332 unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1333 up_read(&md->io_lock);
1334
1335 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
1336 bio_rw(bio) == READA) {
1337 bio_io_error(bio);
1338 return 0;
1339 }
1340
1341 queue_io(md, bio);
1342
1343 return 0;
1344 }
1345
1346 __split_and_process_bio(md, bio);
1347 up_read(&md->io_lock);
1348 return 0;
1349}
1350
1351static int dm_make_request(struct request_queue *q, struct bio *bio)
1352{
1353 struct mapped_device *md = q->queuedata;
1354
1355 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1356 bio_endio(bio, -EOPNOTSUPP);
1357 return 0;
1358 }
1359
1360 return md->saved_make_request_fn(q, bio);
1361}
1362
1363static int dm_request_based(struct mapped_device *md)
1364{
1365 return blk_queue_stackable(md->queue);
1366}
1367
1368static int dm_request(struct request_queue *q, struct bio *bio)
1369{
1370 struct mapped_device *md = q->queuedata;
1371
1372 if (dm_request_based(md))
1373 return dm_make_request(q, bio);
1374
1375 return _dm_request(q, bio);
1376}
1377
1378void dm_dispatch_request(struct request *rq)
1379{
1380 int r;
1381
1382 if (blk_queue_io_stat(rq->q))
1383 rq->cmd_flags |= REQ_IO_STAT;
1384
1385 rq->start_time = jiffies;
1386 r = blk_insert_cloned_request(rq->q, rq);
1387 if (r)
1388 dm_complete_request(rq, r);
1389}
1390EXPORT_SYMBOL_GPL(dm_dispatch_request);
1391
1392static void dm_rq_bio_destructor(struct bio *bio)
1393{
1394 struct dm_rq_clone_bio_info *info = bio->bi_private;
1395 struct mapped_device *md = info->tio->md;
1396
1397 free_bio_info(info);
1398 bio_free(bio, md->bs);
1399}
1400
1401static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1402 void *data)
1403{
1404 struct dm_rq_target_io *tio = data;
1405 struct mapped_device *md = tio->md;
1406 struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
1407
1408 if (!info)
1409 return -ENOMEM;
1410
1411 info->orig = bio_orig;
1412 info->tio = tio;
1413 bio->bi_end_io = end_clone_bio;
1414 bio->bi_private = info;
1415 bio->bi_destructor = dm_rq_bio_destructor;
1416
1417 return 0;
1418}
1419
1420static int setup_clone(struct request *clone, struct request *rq,
1421 struct dm_rq_target_io *tio)
1422{
1423 int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1424 dm_rq_bio_constructor, tio);
1425
1426 if (r)
1427 return r;
1428
1429 clone->cmd = rq->cmd;
1430 clone->cmd_len = rq->cmd_len;
1431 clone->sense = rq->sense;
1432 clone->buffer = rq->buffer;
1433 clone->end_io = end_clone_request;
1434 clone->end_io_data = tio;
1435
1436 return 0;
1437}
1438
1439static int dm_rq_flush_suspending(struct mapped_device *md)
1440{
1441 return !md->suspend_rq.special;
1442}
1443
1444
1445
1446
1447static int dm_prep_fn(struct request_queue *q, struct request *rq)
1448{
1449 struct mapped_device *md = q->queuedata;
1450 struct dm_rq_target_io *tio;
1451 struct request *clone;
1452
1453 if (unlikely(rq == &md->suspend_rq)) {
1454 if (dm_rq_flush_suspending(md))
1455 return BLKPREP_OK;
1456 else
1457
1458 return BLKPREP_KILL;
1459 }
1460
1461 if (unlikely(rq->special)) {
1462 DMWARN("Already has something in rq->special.");
1463 return BLKPREP_KILL;
1464 }
1465
1466 tio = alloc_rq_tio(md);
1467 if (!tio)
1468
1469 return BLKPREP_DEFER;
1470
1471 tio->md = md;
1472 tio->ti = NULL;
1473 tio->orig = rq;
1474 tio->error = 0;
1475 memset(&tio->info, 0, sizeof(tio->info));
1476
1477 clone = &tio->clone;
1478 if (setup_clone(clone, rq, tio)) {
1479
1480 free_rq_tio(tio);
1481 return BLKPREP_DEFER;
1482 }
1483
1484 rq->special = clone;
1485 rq->cmd_flags |= REQ_DONTPREP;
1486
1487 return BLKPREP_OK;
1488}
1489
1490static void map_request(struct dm_target *ti, struct request *rq,
1491 struct mapped_device *md)
1492{
1493 int r;
1494 struct request *clone = rq->special;
1495 struct dm_rq_target_io *tio = clone->end_io_data;
1496
1497
1498
1499
1500
1501
1502
1503
1504 dm_get(md);
1505
1506 tio->ti = ti;
1507 r = ti->type->map_rq(ti, clone, &tio->info);
1508 switch (r) {
1509 case DM_MAPIO_SUBMITTED:
1510
1511 break;
1512 case DM_MAPIO_REMAPPED:
1513
1514 dm_dispatch_request(clone);
1515 break;
1516 case DM_MAPIO_REQUEUE:
1517
1518 dm_requeue_unmapped_request(clone);
1519 break;
1520 default:
1521 if (r > 0) {
1522 DMWARN("unimplemented target map return value: %d", r);
1523 BUG();
1524 }
1525
1526
1527 dm_kill_unmapped_request(clone, r);
1528 break;
1529 }
1530}
1531
1532
1533
1534
1535
1536static void dm_request_fn(struct request_queue *q)
1537{
1538 struct mapped_device *md = q->queuedata;
1539 struct dm_table *map = dm_get_table(md);
1540 struct dm_target *ti;
1541 struct request *rq;
1542
1543
1544
1545
1546
1547 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1548 rq = blk_peek_request(q);
1549 if (!rq)
1550 goto plug_and_out;
1551
1552 if (unlikely(rq == &md->suspend_rq)) {
1553 if (queue_in_flight(q))
1554
1555 goto plug_and_out;
1556
1557
1558 __stop_queue(q);
1559 blk_start_request(rq);
1560 __blk_end_request_all(rq, 0);
1561 wake_up(&md->wait);
1562 goto out;
1563 }
1564
1565 ti = dm_table_find_target(map, blk_rq_pos(rq));
1566 if (ti->type->busy && ti->type->busy(ti))
1567 goto plug_and_out;
1568
1569 blk_start_request(rq);
1570 spin_unlock(q->queue_lock);
1571 map_request(ti, rq, md);
1572 spin_lock_irq(q->queue_lock);
1573 }
1574
1575 goto out;
1576
1577plug_and_out:
1578 if (!elv_queue_empty(q))
1579
1580 blk_plug_device(q);
1581
1582out:
1583 dm_table_put(map);
1584
1585 return;
1586}
1587
1588int dm_underlying_device_busy(struct request_queue *q)
1589{
1590 return blk_lld_busy(q);
1591}
1592EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
1593
1594static int dm_lld_busy(struct request_queue *q)
1595{
1596 int r;
1597 struct mapped_device *md = q->queuedata;
1598 struct dm_table *map = dm_get_table(md);
1599
1600 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
1601 r = 1;
1602 else
1603 r = dm_table_any_busy_target(map);
1604
1605 dm_table_put(map);
1606
1607 return r;
1608}
1609
1610static void dm_unplug_all(struct request_queue *q)
1611{
1612 struct mapped_device *md = q->queuedata;
1613 struct dm_table *map = dm_get_table(md);
1614
1615 if (map) {
1616 if (dm_request_based(md))
1617 generic_unplug_device(q);
1618
1619 dm_table_unplug_all(map);
1620 dm_table_put(map);
1621 }
1622}
1623
1624static int dm_any_congested(void *congested_data, int bdi_bits)
1625{
1626 int r = bdi_bits;
1627 struct mapped_device *md = congested_data;
1628 struct dm_table *map;
1629
1630 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1631 map = dm_get_table(md);
1632 if (map) {
1633
1634
1635
1636
1637 if (dm_request_based(md))
1638 r = md->queue->backing_dev_info.state &
1639 bdi_bits;
1640 else
1641 r = dm_table_any_congested(map, bdi_bits);
1642
1643 dm_table_put(map);
1644 }
1645 }
1646
1647 return r;
1648}
1649
1650
1651
1652
1653static DEFINE_IDR(_minor_idr);
1654
1655static void free_minor(int minor)
1656{
1657 spin_lock(&_minor_lock);
1658 idr_remove(&_minor_idr, minor);
1659 spin_unlock(&_minor_lock);
1660}
1661
1662
1663
1664
1665static int specific_minor(int minor)
1666{
1667 int r, m;
1668
1669 if (minor >= (1 << MINORBITS))
1670 return -EINVAL;
1671
1672 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
1673 if (!r)
1674 return -ENOMEM;
1675
1676 spin_lock(&_minor_lock);
1677
1678 if (idr_find(&_minor_idr, minor)) {
1679 r = -EBUSY;
1680 goto out;
1681 }
1682
1683 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
1684 if (r)
1685 goto out;
1686
1687 if (m != minor) {
1688 idr_remove(&_minor_idr, m);
1689 r = -EBUSY;
1690 goto out;
1691 }
1692
1693out:
1694 spin_unlock(&_minor_lock);
1695 return r;
1696}
1697
1698static int next_free_minor(int *minor)
1699{
1700 int r, m;
1701
1702 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
1703 if (!r)
1704 return -ENOMEM;
1705
1706 spin_lock(&_minor_lock);
1707
1708 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
1709 if (r)
1710 goto out;
1711
1712 if (m >= (1 << MINORBITS)) {
1713 idr_remove(&_minor_idr, m);
1714 r = -ENOSPC;
1715 goto out;
1716 }
1717
1718 *minor = m;
1719
1720out:
1721 spin_unlock(&_minor_lock);
1722 return r;
1723}
1724
1725static const struct block_device_operations dm_blk_dops;
1726
1727static void dm_wq_work(struct work_struct *work);
1728
1729
1730
1731
1732static struct mapped_device *alloc_dev(int minor)
1733{
1734 int r;
1735 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
1736 void *old_md;
1737
1738 if (!md) {
1739 DMWARN("unable to allocate device, out of memory.");
1740 return NULL;
1741 }
1742
1743 if (!try_module_get(THIS_MODULE))
1744 goto bad_module_get;
1745
1746
1747 if (minor == DM_ANY_MINOR)
1748 r = next_free_minor(&minor);
1749 else
1750 r = specific_minor(minor);
1751 if (r < 0)
1752 goto bad_minor;
1753
1754 init_rwsem(&md->io_lock);
1755 mutex_init(&md->suspend_lock);
1756 spin_lock_init(&md->deferred_lock);
1757 rwlock_init(&md->map_lock);
1758 atomic_set(&md->holders, 1);
1759 atomic_set(&md->open_count, 0);
1760 atomic_set(&md->event_nr, 0);
1761 atomic_set(&md->uevent_seq, 0);
1762 INIT_LIST_HEAD(&md->uevent_list);
1763 spin_lock_init(&md->uevent_lock);
1764
1765 md->queue = blk_init_queue(dm_request_fn, NULL);
1766 if (!md->queue)
1767 goto bad_queue;
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
1780 md->saved_make_request_fn = md->queue->make_request_fn;
1781 md->queue->queuedata = md;
1782 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1783 md->queue->backing_dev_info.congested_data = md;
1784 blk_queue_make_request(md->queue, dm_request);
1785 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1786 md->queue->unplug_fn = dm_unplug_all;
1787 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1788 blk_queue_softirq_done(md->queue, dm_softirq_done);
1789 blk_queue_prep_rq(md->queue, dm_prep_fn);
1790 blk_queue_lld_busy(md->queue, dm_lld_busy);
1791
1792 md->disk = alloc_disk(1);
1793 if (!md->disk)
1794 goto bad_disk;
1795
1796 atomic_set(&md->pending[0], 0);
1797 atomic_set(&md->pending[1], 0);
1798 init_waitqueue_head(&md->wait);
1799 INIT_WORK(&md->work, dm_wq_work);
1800 init_waitqueue_head(&md->eventq);
1801
1802 md->disk->major = _major;
1803 md->disk->first_minor = minor;
1804 md->disk->fops = &dm_blk_dops;
1805 md->disk->queue = md->queue;
1806 md->disk->private_data = md;
1807 sprintf(md->disk->disk_name, "dm-%d", minor);
1808 add_disk(md->disk);
1809 format_dev_t(md->name, MKDEV(_major, minor));
1810
1811 md->wq = create_singlethread_workqueue("kdmflush");
1812 if (!md->wq)
1813 goto bad_thread;
1814
1815 md->bdev = bdget_disk(md->disk, 0);
1816 if (!md->bdev)
1817 goto bad_bdev;
1818
1819
1820 spin_lock(&_minor_lock);
1821 old_md = idr_replace(&_minor_idr, md, minor);
1822 spin_unlock(&_minor_lock);
1823
1824 BUG_ON(old_md != MINOR_ALLOCED);
1825
1826 return md;
1827
1828bad_bdev:
1829 destroy_workqueue(md->wq);
1830bad_thread:
1831 del_gendisk(md->disk);
1832 put_disk(md->disk);
1833bad_disk:
1834 blk_cleanup_queue(md->queue);
1835bad_queue:
1836 free_minor(minor);
1837bad_minor:
1838 module_put(THIS_MODULE);
1839bad_module_get:
1840 kfree(md);
1841 return NULL;
1842}
1843
1844static void unlock_fs(struct mapped_device *md);
1845
1846static void free_dev(struct mapped_device *md)
1847{
1848 int minor = MINOR(disk_devt(md->disk));
1849
1850 unlock_fs(md);
1851 bdput(md->bdev);
1852 destroy_workqueue(md->wq);
1853 if (md->tio_pool)
1854 mempool_destroy(md->tio_pool);
1855 if (md->io_pool)
1856 mempool_destroy(md->io_pool);
1857 if (md->bs)
1858 bioset_free(md->bs);
1859 blk_integrity_unregister(md->disk);
1860 del_gendisk(md->disk);
1861 free_minor(minor);
1862
1863 spin_lock(&_minor_lock);
1864 md->disk->private_data = NULL;
1865 spin_unlock(&_minor_lock);
1866
1867 put_disk(md->disk);
1868 blk_cleanup_queue(md->queue);
1869 module_put(THIS_MODULE);
1870 kfree(md);
1871}
1872
1873static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1874{
1875 struct dm_md_mempools *p;
1876
1877 if (md->io_pool && md->tio_pool && md->bs)
1878
1879 goto out;
1880
1881 p = dm_table_get_md_mempools(t);
1882 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
1883
1884 md->io_pool = p->io_pool;
1885 p->io_pool = NULL;
1886 md->tio_pool = p->tio_pool;
1887 p->tio_pool = NULL;
1888 md->bs = p->bs;
1889 p->bs = NULL;
1890
1891out:
1892
1893 dm_table_free_md_mempools(t);
1894}
1895
1896
1897
1898
1899static void event_callback(void *context)
1900{
1901 unsigned long flags;
1902 LIST_HEAD(uevents);
1903 struct mapped_device *md = (struct mapped_device *) context;
1904
1905 spin_lock_irqsave(&md->uevent_lock, flags);
1906 list_splice_init(&md->uevent_list, &uevents);
1907 spin_unlock_irqrestore(&md->uevent_lock, flags);
1908
1909 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
1910
1911 atomic_inc(&md->event_nr);
1912 wake_up(&md->eventq);
1913}
1914
1915static void __set_size(struct mapped_device *md, sector_t size)
1916{
1917 set_capacity(md->disk, size);
1918
1919 mutex_lock(&md->bdev->bd_inode->i_mutex);
1920 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
1921 mutex_unlock(&md->bdev->bd_inode->i_mutex);
1922}
1923
1924static int __bind(struct mapped_device *md, struct dm_table *t,
1925 struct queue_limits *limits)
1926{
1927 struct request_queue *q = md->queue;
1928 sector_t size;
1929 unsigned long flags;
1930
1931 size = dm_table_get_size(t);
1932
1933
1934
1935
1936 if (size != get_capacity(md->disk))
1937 memset(&md->geometry, 0, sizeof(md->geometry));
1938
1939 __set_size(md, size);
1940
1941 if (!size) {
1942 dm_table_destroy(t);
1943 return 0;
1944 }
1945
1946 dm_table_event_callback(t, event_callback, md);
1947
1948
1949
1950
1951
1952
1953
1954
1955 if (dm_table_request_based(t) && !blk_queue_stopped(q))
1956 stop_queue(q);
1957
1958 __bind_mempools(md, t);
1959
1960 write_lock_irqsave(&md->map_lock, flags);
1961 md->map = t;
1962 dm_table_set_restrictions(t, q, limits);
1963 write_unlock_irqrestore(&md->map_lock, flags);
1964
1965 return 0;
1966}
1967
1968static void __unbind(struct mapped_device *md)
1969{
1970 struct dm_table *map = md->map;
1971 unsigned long flags;
1972
1973 if (!map)
1974 return;
1975
1976 dm_table_event_callback(map, NULL, NULL);
1977 write_lock_irqsave(&md->map_lock, flags);
1978 md->map = NULL;
1979 write_unlock_irqrestore(&md->map_lock, flags);
1980 dm_table_destroy(map);
1981}
1982
1983
1984
1985
1986int dm_create(int minor, struct mapped_device **result)
1987{
1988 struct mapped_device *md;
1989
1990 md = alloc_dev(minor);
1991 if (!md)
1992 return -ENXIO;
1993
1994 dm_sysfs_init(md);
1995
1996 *result = md;
1997 return 0;
1998}
1999
2000static struct mapped_device *dm_find_md(dev_t dev)
2001{
2002 struct mapped_device *md;
2003 unsigned minor = MINOR(dev);
2004
2005 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2006 return NULL;
2007
2008 spin_lock(&_minor_lock);
2009
2010 md = idr_find(&_minor_idr, minor);
2011 if (md && (md == MINOR_ALLOCED ||
2012 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2013 test_bit(DMF_FREEING, &md->flags))) {
2014 md = NULL;
2015 goto out;
2016 }
2017
2018out:
2019 spin_unlock(&_minor_lock);
2020
2021 return md;
2022}
2023
2024struct mapped_device *dm_get_md(dev_t dev)
2025{
2026 struct mapped_device *md = dm_find_md(dev);
2027
2028 if (md)
2029 dm_get(md);
2030
2031 return md;
2032}
2033
2034void *dm_get_mdptr(struct mapped_device *md)
2035{
2036 return md->interface_ptr;
2037}
2038
2039void dm_set_mdptr(struct mapped_device *md, void *ptr)
2040{
2041 md->interface_ptr = ptr;
2042}
2043
2044void dm_get(struct mapped_device *md)
2045{
2046 atomic_inc(&md->holders);
2047}
2048
2049const char *dm_device_name(struct mapped_device *md)
2050{
2051 return md->name;
2052}
2053EXPORT_SYMBOL_GPL(dm_device_name);
2054
2055void dm_put(struct mapped_device *md)
2056{
2057 struct dm_table *map;
2058
2059 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2060
2061 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
2062 map = dm_get_table(md);
2063 idr_replace(&_minor_idr, MINOR_ALLOCED,
2064 MINOR(disk_devt(dm_disk(md))));
2065 set_bit(DMF_FREEING, &md->flags);
2066 spin_unlock(&_minor_lock);
2067 if (!dm_suspended(md)) {
2068 dm_table_presuspend_targets(map);
2069 dm_table_postsuspend_targets(map);
2070 }
2071 dm_sysfs_exit(md);
2072 dm_table_put(map);
2073 __unbind(md);
2074 free_dev(md);
2075 }
2076}
2077EXPORT_SYMBOL_GPL(dm_put);
2078
2079static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2080{
2081 int r = 0;
2082 DECLARE_WAITQUEUE(wait, current);
2083 struct request_queue *q = md->queue;
2084 unsigned long flags;
2085
2086 dm_unplug_all(md->queue);
2087
2088 add_wait_queue(&md->wait, &wait);
2089
2090 while (1) {
2091 set_current_state(interruptible);
2092
2093 smp_mb();
2094 if (dm_request_based(md)) {
2095 spin_lock_irqsave(q->queue_lock, flags);
2096 if (!queue_in_flight(q) && blk_queue_stopped(q)) {
2097 spin_unlock_irqrestore(q->queue_lock, flags);
2098 break;
2099 }
2100 spin_unlock_irqrestore(q->queue_lock, flags);
2101 } else if (!atomic_read(&md->pending[0]) &&
2102 !atomic_read(&md->pending[1]))
2103 break;
2104
2105 if (interruptible == TASK_INTERRUPTIBLE &&
2106 signal_pending(current)) {
2107 r = -EINTR;
2108 break;
2109 }
2110
2111 io_schedule();
2112 }
2113 set_current_state(TASK_RUNNING);
2114
2115 remove_wait_queue(&md->wait, &wait);
2116
2117 return r;
2118}
2119
2120static void dm_flush(struct mapped_device *md)
2121{
2122 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2123
2124 bio_init(&md->barrier_bio);
2125 md->barrier_bio.bi_bdev = md->bdev;
2126 md->barrier_bio.bi_rw = WRITE_BARRIER;
2127 __split_and_process_bio(md, &md->barrier_bio);
2128
2129 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2130}
2131
2132static void process_barrier(struct mapped_device *md, struct bio *bio)
2133{
2134 md->barrier_error = 0;
2135
2136 dm_flush(md);
2137
2138 if (!bio_empty_barrier(bio)) {
2139 __split_and_process_bio(md, bio);
2140 dm_flush(md);
2141 }
2142
2143 if (md->barrier_error != DM_ENDIO_REQUEUE)
2144 bio_endio(bio, md->barrier_error);
2145 else {
2146 spin_lock_irq(&md->deferred_lock);
2147 bio_list_add_head(&md->deferred, bio);
2148 spin_unlock_irq(&md->deferred_lock);
2149 }
2150}
2151
2152
2153
2154
2155static void dm_wq_work(struct work_struct *work)
2156{
2157 struct mapped_device *md = container_of(work, struct mapped_device,
2158 work);
2159 struct bio *c;
2160
2161 down_write(&md->io_lock);
2162
2163 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2164 spin_lock_irq(&md->deferred_lock);
2165 c = bio_list_pop(&md->deferred);
2166 spin_unlock_irq(&md->deferred_lock);
2167
2168 if (!c) {
2169 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2170 break;
2171 }
2172
2173 up_write(&md->io_lock);
2174
2175 if (dm_request_based(md))
2176 generic_make_request(c);
2177 else {
2178 if (bio_rw_flagged(c, BIO_RW_BARRIER))
2179 process_barrier(md, c);
2180 else
2181 __split_and_process_bio(md, c);
2182 }
2183
2184 down_write(&md->io_lock);
2185 }
2186
2187 up_write(&md->io_lock);
2188}
2189
2190static void dm_queue_flush(struct mapped_device *md)
2191{
2192 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2193 smp_mb__after_clear_bit();
2194 queue_work(md->wq, &md->work);
2195}
2196
2197
2198
2199
2200int dm_swap_table(struct mapped_device *md, struct dm_table *table)
2201{
2202 struct queue_limits limits;
2203 int r = -EINVAL;
2204
2205 mutex_lock(&md->suspend_lock);
2206
2207
2208 if (!dm_suspended(md))
2209 goto out;
2210
2211 r = dm_calculate_queue_limits(table, &limits);
2212 if (r)
2213 goto out;
2214
2215
2216 if (md->map &&
2217 (dm_table_get_type(md->map) != dm_table_get_type(table))) {
2218 DMWARN("can't change the device type after a table is bound");
2219 goto out;
2220 }
2221
2222 __unbind(md);
2223 r = __bind(md, table, &limits);
2224
2225out:
2226 mutex_unlock(&md->suspend_lock);
2227 return r;
2228}
2229
2230static void dm_rq_invalidate_suspend_marker(struct mapped_device *md)
2231{
2232 md->suspend_rq.special = (void *)0x1;
2233}
2234
2235static void dm_rq_abort_suspend(struct mapped_device *md, int noflush)
2236{
2237 struct request_queue *q = md->queue;
2238 unsigned long flags;
2239
2240 spin_lock_irqsave(q->queue_lock, flags);
2241 if (!noflush)
2242 dm_rq_invalidate_suspend_marker(md);
2243 __start_queue(q);
2244 spin_unlock_irqrestore(q->queue_lock, flags);
2245}
2246
2247static void dm_rq_start_suspend(struct mapped_device *md, int noflush)
2248{
2249 struct request *rq = &md->suspend_rq;
2250 struct request_queue *q = md->queue;
2251
2252 if (noflush)
2253 stop_queue(q);
2254 else {
2255 blk_rq_init(q, rq);
2256 blk_insert_request(q, rq, 0, NULL);
2257 }
2258}
2259
2260static int dm_rq_suspend_available(struct mapped_device *md, int noflush)
2261{
2262 int r = 1;
2263 struct request *rq = &md->suspend_rq;
2264 struct request_queue *q = md->queue;
2265 unsigned long flags;
2266
2267 if (noflush)
2268 return r;
2269
2270
2271 spin_lock_irqsave(q->queue_lock, flags);
2272 if (unlikely(rq->ref_count)) {
2273
2274
2275
2276
2277
2278
2279
2280
2281 BUG_ON(!rq->special);
2282 DMWARN("Invalidating the previous flush suspend is still in"
2283 " progress. Please retry later.");
2284 r = 0;
2285 }
2286 spin_unlock_irqrestore(q->queue_lock, flags);
2287
2288 return r;
2289}
2290
2291
2292
2293
2294
2295static int lock_fs(struct mapped_device *md)
2296{
2297 int r;
2298
2299 WARN_ON(md->frozen_sb);
2300
2301 md->frozen_sb = freeze_bdev(md->bdev);
2302 if (IS_ERR(md->frozen_sb)) {
2303 r = PTR_ERR(md->frozen_sb);
2304 md->frozen_sb = NULL;
2305 return r;
2306 }
2307
2308 set_bit(DMF_FROZEN, &md->flags);
2309
2310 return 0;
2311}
2312
2313static void unlock_fs(struct mapped_device *md)
2314{
2315 if (!test_bit(DMF_FROZEN, &md->flags))
2316 return;
2317
2318 thaw_bdev(md->bdev, md->frozen_sb);
2319 md->frozen_sb = NULL;
2320 clear_bit(DMF_FROZEN, &md->flags);
2321}
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2378{
2379 struct dm_table *map = NULL;
2380 int r = 0;
2381 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
2382 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
2383
2384 mutex_lock(&md->suspend_lock);
2385
2386 if (dm_suspended(md)) {
2387 r = -EINVAL;
2388 goto out_unlock;
2389 }
2390
2391 if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) {
2392 r = -EBUSY;
2393 goto out_unlock;
2394 }
2395
2396 map = dm_get_table(md);
2397
2398
2399
2400
2401
2402 if (noflush)
2403 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2404
2405
2406 dm_table_presuspend_targets(map);
2407
2408
2409
2410
2411
2412 if (!noflush && do_lockfs) {
2413 r = lock_fs(md);
2414 if (r)
2415 goto out;
2416 }
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434 down_write(&md->io_lock);
2435 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2436 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
2437 up_write(&md->io_lock);
2438
2439 flush_workqueue(md->wq);
2440
2441 if (dm_request_based(md))
2442 dm_rq_start_suspend(md, noflush);
2443
2444
2445
2446
2447
2448
2449 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
2450
2451 down_write(&md->io_lock);
2452 if (noflush)
2453 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2454 up_write(&md->io_lock);
2455
2456
2457 if (r < 0) {
2458 dm_queue_flush(md);
2459
2460 if (dm_request_based(md))
2461 dm_rq_abort_suspend(md, noflush);
2462
2463 unlock_fs(md);
2464 goto out;
2465 }
2466
2467
2468
2469
2470
2471
2472
2473 dm_table_postsuspend_targets(map);
2474
2475 set_bit(DMF_SUSPENDED, &md->flags);
2476
2477out:
2478 dm_table_put(map);
2479
2480out_unlock:
2481 mutex_unlock(&md->suspend_lock);
2482 return r;
2483}
2484
2485int dm_resume(struct mapped_device *md)
2486{
2487 int r = -EINVAL;
2488 struct dm_table *map = NULL;
2489
2490 mutex_lock(&md->suspend_lock);
2491 if (!dm_suspended(md))
2492 goto out;
2493
2494 map = dm_get_table(md);
2495 if (!map || !dm_table_get_size(map))
2496 goto out;
2497
2498 r = dm_table_resume_targets(map);
2499 if (r)
2500 goto out;
2501
2502 dm_queue_flush(md);
2503
2504
2505
2506
2507
2508
2509 if (dm_request_based(md))
2510 start_queue(md->queue);
2511
2512 unlock_fs(md);
2513
2514 clear_bit(DMF_SUSPENDED, &md->flags);
2515
2516 dm_table_unplug_all(map);
2517 r = 0;
2518out:
2519 dm_table_put(map);
2520 mutex_unlock(&md->suspend_lock);
2521
2522 return r;
2523}
2524
2525
2526
2527
2528void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
2529 unsigned cookie)
2530{
2531 char udev_cookie[DM_COOKIE_LENGTH];
2532 char *envp[] = { udev_cookie, NULL };
2533
2534 if (!cookie)
2535 kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
2536 else {
2537 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2538 DM_COOKIE_ENV_VAR_NAME, cookie);
2539 kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
2540 }
2541}
2542
2543uint32_t dm_next_uevent_seq(struct mapped_device *md)
2544{
2545 return atomic_add_return(1, &md->uevent_seq);
2546}
2547
2548uint32_t dm_get_event_nr(struct mapped_device *md)
2549{
2550 return atomic_read(&md->event_nr);
2551}
2552
2553int dm_wait_event(struct mapped_device *md, int event_nr)
2554{
2555 return wait_event_interruptible(md->eventq,
2556 (event_nr != atomic_read(&md->event_nr)));
2557}
2558
2559void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
2560{
2561 unsigned long flags;
2562
2563 spin_lock_irqsave(&md->uevent_lock, flags);
2564 list_add(elist, &md->uevent_list);
2565 spin_unlock_irqrestore(&md->uevent_lock, flags);
2566}
2567
2568
2569
2570
2571
2572struct gendisk *dm_disk(struct mapped_device *md)
2573{
2574 return md->disk;
2575}
2576
2577struct kobject *dm_kobject(struct mapped_device *md)
2578{
2579 return &md->kobj;
2580}
2581
2582
2583
2584
2585
2586struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2587{
2588 struct mapped_device *md;
2589
2590 md = container_of(kobj, struct mapped_device, kobj);
2591 if (&md->kobj != kobj)
2592 return NULL;
2593
2594 if (test_bit(DMF_FREEING, &md->flags) ||
2595 test_bit(DMF_DELETING, &md->flags))
2596 return NULL;
2597
2598 dm_get(md);
2599 return md;
2600}
2601
2602int dm_suspended(struct mapped_device *md)
2603{
2604 return test_bit(DMF_SUSPENDED, &md->flags);
2605}
2606
2607int dm_noflush_suspending(struct dm_target *ti)
2608{
2609 struct mapped_device *md = dm_table_get_md(ti->table);
2610 int r = __noflush_suspending(md);
2611
2612 dm_put(md);
2613
2614 return r;
2615}
2616EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2617
2618struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
2619{
2620 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
2621
2622 if (!pools)
2623 return NULL;
2624
2625 pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
2626 mempool_create_slab_pool(MIN_IOS, _io_cache) :
2627 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
2628 if (!pools->io_pool)
2629 goto free_pools_and_out;
2630
2631 pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
2632 mempool_create_slab_pool(MIN_IOS, _tio_cache) :
2633 mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
2634 if (!pools->tio_pool)
2635 goto free_io_pool_and_out;
2636
2637 pools->bs = (type == DM_TYPE_BIO_BASED) ?
2638 bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
2639 if (!pools->bs)
2640 goto free_tio_pool_and_out;
2641
2642 return pools;
2643
2644free_tio_pool_and_out:
2645 mempool_destroy(pools->tio_pool);
2646
2647free_io_pool_and_out:
2648 mempool_destroy(pools->io_pool);
2649
2650free_pools_and_out:
2651 kfree(pools);
2652
2653 return NULL;
2654}
2655
2656void dm_free_md_mempools(struct dm_md_mempools *pools)
2657{
2658 if (!pools)
2659 return;
2660
2661 if (pools->io_pool)
2662 mempool_destroy(pools->io_pool);
2663
2664 if (pools->tio_pool)
2665 mempool_destroy(pools->tio_pool);
2666
2667 if (pools->bs)
2668 bioset_free(pools->bs);
2669
2670 kfree(pools);
2671}
2672
2673static const struct block_device_operations dm_blk_dops = {
2674 .open = dm_blk_open,
2675 .release = dm_blk_close,
2676 .ioctl = dm_blk_ioctl,
2677 .getgeo = dm_blk_getgeo,
2678 .owner = THIS_MODULE
2679};
2680
2681EXPORT_SYMBOL(dm_get_mapinfo);
2682
2683
2684
2685
2686module_init(dm_init);
2687module_exit(dm_exit);
2688
2689module_param(major, uint, 0);
2690MODULE_PARM_DESC(major, "The major number of the device mapper");
2691MODULE_DESCRIPTION(DM_NAME " driver");
2692MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
2693MODULE_LICENSE("GPL");
2694