1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/mempool.h>
27#include <linux/workqueue.h>
28#include <scsi/sg.h>
29
30#include <trace/events/block.h>
31
32
33
34
35
36#define BIO_INLINE_VECS 4
37
38static mempool_t *bio_split_pool __read_mostly;
39
40
41
42
43
44
45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
46static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
48};
49#undef BV
50
51
52
53
54
55struct bio_set *fs_bio_set;
56
57
58
59
60struct bio_slab {
61 struct kmem_cache *slab;
62 unsigned int slab_ref;
63 unsigned int slab_size;
64 char name[8];
65};
66static DEFINE_MUTEX(bio_slab_lock);
67static struct bio_slab *bio_slabs;
68static unsigned int bio_slab_nr, bio_slab_max;
69
70static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
71{
72 unsigned int sz = sizeof(struct bio) + extra_size;
73 struct kmem_cache *slab = NULL;
74 struct bio_slab *bslab;
75 unsigned int i, entry = -1;
76
77 mutex_lock(&bio_slab_lock);
78
79 i = 0;
80 while (i < bio_slab_nr) {
81 bslab = &bio_slabs[i];
82
83 if (!bslab->slab && entry == -1)
84 entry = i;
85 else if (bslab->slab_size == sz) {
86 slab = bslab->slab;
87 bslab->slab_ref++;
88 break;
89 }
90 i++;
91 }
92
93 if (slab)
94 goto out_unlock;
95
96 if (bio_slab_nr == bio_slab_max && entry == -1) {
97 bio_slab_max <<= 1;
98 bio_slabs = krealloc(bio_slabs,
99 bio_slab_max * sizeof(struct bio_slab),
100 GFP_KERNEL);
101 if (!bio_slabs)
102 goto out_unlock;
103 }
104 if (entry == -1)
105 entry = bio_slab_nr++;
106
107 bslab = &bio_slabs[entry];
108
109 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
110 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
111 if (!slab)
112 goto out_unlock;
113
114 printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
115 bslab->slab = slab;
116 bslab->slab_ref = 1;
117 bslab->slab_size = sz;
118out_unlock:
119 mutex_unlock(&bio_slab_lock);
120 return slab;
121}
122
123static void bio_put_slab(struct bio_set *bs)
124{
125 struct bio_slab *bslab = NULL;
126 unsigned int i;
127
128 mutex_lock(&bio_slab_lock);
129
130 for (i = 0; i < bio_slab_nr; i++) {
131 if (bs->bio_slab == bio_slabs[i].slab) {
132 bslab = &bio_slabs[i];
133 break;
134 }
135 }
136
137 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
138 goto out;
139
140 WARN_ON(!bslab->slab_ref);
141
142 if (--bslab->slab_ref)
143 goto out;
144
145 kmem_cache_destroy(bslab->slab);
146 bslab->slab = NULL;
147
148out:
149 mutex_unlock(&bio_slab_lock);
150}
151
152unsigned int bvec_nr_vecs(unsigned short idx)
153{
154 return bvec_slabs[idx].nr_vecs;
155}
156
157void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
158{
159 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
160
161 if (idx == BIOVEC_MAX_IDX)
162 mempool_free(bv, bs->bvec_pool);
163 else {
164 struct biovec_slab *bvs = bvec_slabs + idx;
165
166 kmem_cache_free(bvs->slab, bv);
167 }
168}
169
170struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
171 struct bio_set *bs)
172{
173 struct bio_vec *bvl;
174
175
176
177
178 switch (nr) {
179 case 1:
180 *idx = 0;
181 break;
182 case 2 ... 4:
183 *idx = 1;
184 break;
185 case 5 ... 16:
186 *idx = 2;
187 break;
188 case 17 ... 64:
189 *idx = 3;
190 break;
191 case 65 ... 128:
192 *idx = 4;
193 break;
194 case 129 ... BIO_MAX_PAGES:
195 *idx = 5;
196 break;
197 default:
198 return NULL;
199 }
200
201
202
203
204
205 if (*idx == BIOVEC_MAX_IDX) {
206fallback:
207 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
208 } else {
209 struct biovec_slab *bvs = bvec_slabs + *idx;
210 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
211
212
213
214
215
216
217 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
218
219
220
221
222
223 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
224 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
225 *idx = BIOVEC_MAX_IDX;
226 goto fallback;
227 }
228 }
229
230 return bvl;
231}
232
233void bio_free(struct bio *bio, struct bio_set *bs)
234{
235 void *p;
236
237 if (bio_has_allocated_vec(bio))
238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
239
240 if (bio_integrity(bio))
241 bio_integrity_free(bio, bs);
242
243
244
245
246 p = bio;
247 if (bs->front_pad)
248 p -= bs->front_pad;
249
250 mempool_free(p, bs->bio_pool);
251}
252EXPORT_SYMBOL(bio_free);
253
254void bio_init(struct bio *bio)
255{
256 memset(bio, 0, sizeof(*bio));
257 bio->bi_flags = 1 << BIO_UPTODATE;
258 atomic_set(&bio->bi_cnt, 1);
259}
260EXPORT_SYMBOL(bio_init);
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
278{
279 unsigned long idx = BIO_POOL_NONE;
280 struct bio_vec *bvl = NULL;
281 struct bio *bio;
282 void *p;
283
284 p = mempool_alloc(bs->bio_pool, gfp_mask);
285 if (unlikely(!p))
286 return NULL;
287 bio = p + bs->front_pad;
288
289 bio_init(bio);
290
291 if (unlikely(!nr_iovecs))
292 goto out_set;
293
294 if (nr_iovecs <= BIO_INLINE_VECS) {
295 bvl = bio->bi_inline_vecs;
296 nr_iovecs = BIO_INLINE_VECS;
297 } else {
298 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
299 if (unlikely(!bvl))
300 goto err_free;
301
302 nr_iovecs = bvec_nr_vecs(idx);
303 }
304out_set:
305 bio->bi_flags |= idx << BIO_POOL_OFFSET;
306 bio->bi_max_vecs = nr_iovecs;
307 bio->bi_io_vec = bvl;
308 return bio;
309
310err_free:
311 mempool_free(p, bs->bio_pool);
312 return NULL;
313}
314EXPORT_SYMBOL(bio_alloc_bioset);
315
316static void bio_fs_destructor(struct bio *bio)
317{
318 bio_free(bio, fs_bio_set);
319}
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
341{
342 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
343
344 if (bio)
345 bio->bi_destructor = bio_fs_destructor;
346
347 return bio;
348}
349EXPORT_SYMBOL(bio_alloc);
350
351static void bio_kmalloc_destructor(struct bio *bio)
352{
353 if (bio_integrity(bio))
354 bio_integrity_free(bio, fs_bio_set);
355 kfree(bio);
356}
357
358
359
360
361
362
363
364
365
366
367
368struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
369{
370 struct bio *bio;
371
372 if (nr_iovecs > UIO_MAXIOV)
373 return NULL;
374
375 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
376 gfp_mask);
377 if (unlikely(!bio))
378 return NULL;
379
380 bio_init(bio);
381 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
382 bio->bi_max_vecs = nr_iovecs;
383 bio->bi_io_vec = bio->bi_inline_vecs;
384 bio->bi_destructor = bio_kmalloc_destructor;
385
386 return bio;
387}
388EXPORT_SYMBOL(bio_kmalloc);
389
390void zero_fill_bio(struct bio *bio)
391{
392 unsigned long flags;
393 struct bio_vec *bv;
394 int i;
395
396 bio_for_each_segment(bv, bio, i) {
397 char *data = bvec_kmap_irq(bv, &flags);
398 memset(data, 0, bv->bv_len);
399 flush_dcache_page(bv->bv_page);
400 bvec_kunmap_irq(data, &flags);
401 }
402}
403EXPORT_SYMBOL(zero_fill_bio);
404
405
406
407
408
409
410
411
412
413void bio_put(struct bio *bio)
414{
415 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
416
417
418
419
420 if (atomic_dec_and_test(&bio->bi_cnt)) {
421 bio->bi_next = NULL;
422 bio->bi_destructor(bio);
423 }
424}
425EXPORT_SYMBOL(bio_put);
426
427inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
428{
429 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
430 blk_recount_segments(q, bio);
431
432 return bio->bi_phys_segments;
433}
434EXPORT_SYMBOL(bio_phys_segments);
435
436
437
438
439
440
441
442
443
444
445void __bio_clone(struct bio *bio, struct bio *bio_src)
446{
447 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
448 bio_src->bi_max_vecs * sizeof(struct bio_vec));
449
450
451
452
453
454 bio->bi_sector = bio_src->bi_sector;
455 bio->bi_bdev = bio_src->bi_bdev;
456 bio->bi_flags |= 1 << BIO_CLONED;
457 bio->bi_rw = bio_src->bi_rw;
458 bio->bi_vcnt = bio_src->bi_vcnt;
459 bio->bi_size = bio_src->bi_size;
460 bio->bi_idx = bio_src->bi_idx;
461}
462EXPORT_SYMBOL(__bio_clone);
463
464
465
466
467
468
469
470
471struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
472{
473 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
474
475 if (!b)
476 return NULL;
477
478 b->bi_destructor = bio_fs_destructor;
479 __bio_clone(b, bio);
480
481 if (bio_integrity(bio)) {
482 int ret;
483
484 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
485
486 if (ret < 0) {
487 bio_put(b);
488 return NULL;
489 }
490 }
491
492 return b;
493}
494EXPORT_SYMBOL(bio_clone);
495
496
497
498
499
500
501
502
503
504
505int bio_get_nr_vecs(struct block_device *bdev)
506{
507 struct request_queue *q = bdev_get_queue(bdev);
508 int nr_pages;
509
510 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
511 if (nr_pages > queue_max_segments(q))
512 nr_pages = queue_max_segments(q);
513
514 return nr_pages;
515}
516EXPORT_SYMBOL(bio_get_nr_vecs);
517
518static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
519 *page, unsigned int len, unsigned int offset,
520 unsigned short max_sectors)
521{
522 int retried_segments = 0;
523 struct bio_vec *bvec;
524
525
526
527
528 if (unlikely(bio_flagged(bio, BIO_CLONED)))
529 return 0;
530
531 if (((bio->bi_size + len) >> 9) > max_sectors)
532 return 0;
533
534
535
536
537
538
539 if (bio->bi_vcnt > 0) {
540 struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
541
542 if (page == prev->bv_page &&
543 offset == prev->bv_offset + prev->bv_len) {
544 unsigned int prev_bv_len = prev->bv_len;
545 prev->bv_len += len;
546
547 if (q->merge_bvec_fn) {
548 struct bvec_merge_data bvm = {
549
550
551
552
553 .bi_bdev = bio->bi_bdev,
554 .bi_sector = bio->bi_sector,
555 .bi_size = bio->bi_size - prev_bv_len,
556 .bi_rw = bio->bi_rw,
557 };
558
559 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
560 prev->bv_len -= len;
561 return 0;
562 }
563 }
564
565 goto done;
566 }
567 }
568
569 if (bio->bi_vcnt >= bio->bi_max_vecs)
570 return 0;
571
572
573
574
575
576
577 while (bio->bi_phys_segments >= queue_max_segments(q)) {
578
579 if (retried_segments)
580 return 0;
581
582 retried_segments = 1;
583 blk_recount_segments(q, bio);
584 }
585
586
587
588
589
590 bvec = &bio->bi_io_vec[bio->bi_vcnt];
591 bvec->bv_page = page;
592 bvec->bv_len = len;
593 bvec->bv_offset = offset;
594
595
596
597
598
599
600 if (q->merge_bvec_fn) {
601 struct bvec_merge_data bvm = {
602 .bi_bdev = bio->bi_bdev,
603 .bi_sector = bio->bi_sector,
604 .bi_size = bio->bi_size,
605 .bi_rw = bio->bi_rw,
606 };
607
608
609
610
611
612 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
613 bvec->bv_page = NULL;
614 bvec->bv_len = 0;
615 bvec->bv_offset = 0;
616 return 0;
617 }
618 }
619
620
621 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
622 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
623
624 bio->bi_vcnt++;
625 bio->bi_phys_segments++;
626 done:
627 bio->bi_size += len;
628 return len;
629}
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
647 unsigned int len, unsigned int offset)
648{
649 return __bio_add_page(q, bio, page, len, offset,
650 queue_max_hw_sectors(q));
651}
652EXPORT_SYMBOL(bio_add_pc_page);
653
654
655
656
657
658
659
660
661
662
663
664
665
666int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
667 unsigned int offset)
668{
669 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
670 return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
671}
672EXPORT_SYMBOL(bio_add_page);
673
674struct bio_map_data {
675 struct bio_vec *iovecs;
676 struct sg_iovec *sgvecs;
677 int nr_sgvecs;
678 int is_our_pages;
679};
680
681static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
682 struct sg_iovec *iov, int iov_count,
683 int is_our_pages)
684{
685 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
686 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
687 bmd->nr_sgvecs = iov_count;
688 bmd->is_our_pages = is_our_pages;
689 bio->bi_private = bmd;
690}
691
692static void bio_free_map_data(struct bio_map_data *bmd)
693{
694 kfree(bmd->iovecs);
695 kfree(bmd->sgvecs);
696 kfree(bmd);
697}
698
699static struct bio_map_data *bio_alloc_map_data(int nr_segs,
700 unsigned int iov_count,
701 gfp_t gfp_mask)
702{
703 struct bio_map_data *bmd;
704
705 if (iov_count > UIO_MAXIOV)
706 return NULL;
707
708 bmd = kmalloc(sizeof(*bmd), gfp_mask);
709 if (!bmd)
710 return NULL;
711
712 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
713 if (!bmd->iovecs) {
714 kfree(bmd);
715 return NULL;
716 }
717
718 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
719 if (bmd->sgvecs)
720 return bmd;
721
722 kfree(bmd->iovecs);
723 kfree(bmd);
724 return NULL;
725}
726
727static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
728 struct sg_iovec *iov, int iov_count,
729 int to_user, int from_user, int do_free_page)
730{
731 int ret = 0, i;
732 struct bio_vec *bvec;
733 int iov_idx = 0;
734 unsigned int iov_off = 0;
735
736 __bio_for_each_segment(bvec, bio, i, 0) {
737 char *bv_addr = page_address(bvec->bv_page);
738 unsigned int bv_len = iovecs[i].bv_len;
739
740 while (bv_len && iov_idx < iov_count) {
741 unsigned int bytes;
742 char __user *iov_addr;
743
744 bytes = min_t(unsigned int,
745 iov[iov_idx].iov_len - iov_off, bv_len);
746 iov_addr = iov[iov_idx].iov_base + iov_off;
747
748 if (!ret) {
749 if (to_user)
750 ret = copy_to_user(iov_addr, bv_addr,
751 bytes);
752
753 if (from_user)
754 ret = copy_from_user(bv_addr, iov_addr,
755 bytes);
756
757 if (ret)
758 ret = -EFAULT;
759 }
760
761 bv_len -= bytes;
762 bv_addr += bytes;
763 iov_addr += bytes;
764 iov_off += bytes;
765
766 if (iov[iov_idx].iov_len == iov_off) {
767 iov_idx++;
768 iov_off = 0;
769 }
770 }
771
772 if (do_free_page)
773 __free_page(bvec->bv_page);
774 }
775
776 return ret;
777}
778
779
780
781
782
783
784
785
786int bio_uncopy_user(struct bio *bio)
787{
788 struct bio_map_data *bmd = bio->bi_private;
789 int ret = 0;
790
791 if (!bio_flagged(bio, BIO_NULL_MAPPED))
792 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
793 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
794 0, bmd->is_our_pages);
795 bio_free_map_data(bmd);
796 bio_put(bio);
797 return ret;
798}
799EXPORT_SYMBOL(bio_uncopy_user);
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814struct bio *bio_copy_user_iov(struct request_queue *q,
815 struct rq_map_data *map_data,
816 struct sg_iovec *iov, int iov_count,
817 int write_to_vm, gfp_t gfp_mask)
818{
819 struct bio_map_data *bmd;
820 struct bio_vec *bvec;
821 struct page *page;
822 struct bio *bio;
823 int i, ret;
824 int nr_pages = 0;
825 unsigned int len = 0;
826 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
827
828 for (i = 0; i < iov_count; i++) {
829 unsigned long uaddr;
830 unsigned long end;
831 unsigned long start;
832
833 uaddr = (unsigned long)iov[i].iov_base;
834 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
835 start = uaddr >> PAGE_SHIFT;
836
837
838
839
840 if (end < start)
841 return ERR_PTR(-EINVAL);
842
843 nr_pages += end - start;
844 len += iov[i].iov_len;
845 }
846
847 if (offset)
848 nr_pages++;
849
850 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
851 if (!bmd)
852 return ERR_PTR(-ENOMEM);
853
854 ret = -ENOMEM;
855 bio = bio_kmalloc(gfp_mask, nr_pages);
856 if (!bio)
857 goto out_bmd;
858
859 if (!write_to_vm)
860 bio->bi_rw |= REQ_WRITE;
861
862 ret = 0;
863
864 if (map_data) {
865 nr_pages = 1 << map_data->page_order;
866 i = map_data->offset / PAGE_SIZE;
867 }
868 while (len) {
869 unsigned int bytes = PAGE_SIZE;
870
871 bytes -= offset;
872
873 if (bytes > len)
874 bytes = len;
875
876 if (map_data) {
877 if (i == map_data->nr_entries * nr_pages) {
878 ret = -ENOMEM;
879 break;
880 }
881
882 page = map_data->pages[i / nr_pages];
883 page += (i % nr_pages);
884
885 i++;
886 } else {
887 page = alloc_page(q->bounce_gfp | gfp_mask);
888 if (!page) {
889 ret = -ENOMEM;
890 break;
891 }
892 }
893
894 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
895 break;
896
897 len -= bytes;
898 offset = 0;
899 }
900
901 if (ret)
902 goto cleanup;
903
904
905
906
907 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
908 (map_data && map_data->from_user)) {
909 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
910 if (ret)
911 goto cleanup;
912 }
913
914 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
915 return bio;
916cleanup:
917 if (!map_data)
918 bio_for_each_segment(bvec, bio, i)
919 __free_page(bvec->bv_page);
920
921 bio_put(bio);
922out_bmd:
923 bio_free_map_data(bmd);
924 return ERR_PTR(ret);
925}
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
941 unsigned long uaddr, unsigned int len,
942 int write_to_vm, gfp_t gfp_mask)
943{
944 struct sg_iovec iov;
945
946 iov.iov_base = (void __user *)uaddr;
947 iov.iov_len = len;
948
949 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
950}
951EXPORT_SYMBOL(bio_copy_user);
952
953static struct bio *__bio_map_user_iov(struct request_queue *q,
954 struct block_device *bdev,
955 struct sg_iovec *iov, int iov_count,
956 int write_to_vm, gfp_t gfp_mask)
957{
958 int i, j;
959 int nr_pages = 0;
960 struct page **pages;
961 struct bio *bio;
962 int cur_page = 0;
963 int ret, offset;
964
965 for (i = 0; i < iov_count; i++) {
966 unsigned long uaddr = (unsigned long)iov[i].iov_base;
967 unsigned long len = iov[i].iov_len;
968 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
969 unsigned long start = uaddr >> PAGE_SHIFT;
970
971
972
973
974 if (end < start)
975 return ERR_PTR(-EINVAL);
976
977 nr_pages += end - start;
978
979
980
981 if (uaddr & queue_dma_alignment(q))
982 return ERR_PTR(-EINVAL);
983 }
984
985 if (!nr_pages)
986 return ERR_PTR(-EINVAL);
987
988 bio = bio_kmalloc(gfp_mask, nr_pages);
989 if (!bio)
990 return ERR_PTR(-ENOMEM);
991
992 ret = -ENOMEM;
993 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
994 if (!pages)
995 goto out;
996
997 for (i = 0; i < iov_count; i++) {
998 unsigned long uaddr = (unsigned long)iov[i].iov_base;
999 unsigned long len = iov[i].iov_len;
1000 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1001 unsigned long start = uaddr >> PAGE_SHIFT;
1002 const int local_nr_pages = end - start;
1003 const int page_limit = cur_page + local_nr_pages;
1004
1005 ret = get_user_pages_fast(uaddr, local_nr_pages,
1006 write_to_vm, &pages[cur_page]);
1007 if (ret < local_nr_pages) {
1008 ret = -EFAULT;
1009 goto out_unmap;
1010 }
1011
1012 offset = uaddr & ~PAGE_MASK;
1013 for (j = cur_page; j < page_limit; j++) {
1014 unsigned int bytes = PAGE_SIZE - offset;
1015
1016 if (len <= 0)
1017 break;
1018
1019 if (bytes > len)
1020 bytes = len;
1021
1022
1023
1024
1025 if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
1026 bytes)
1027 break;
1028
1029 len -= bytes;
1030 offset = 0;
1031 }
1032
1033 cur_page = j;
1034
1035
1036
1037 while (j < page_limit)
1038 page_cache_release(pages[j++]);
1039 }
1040
1041 kfree(pages);
1042
1043
1044
1045
1046 if (!write_to_vm)
1047 bio->bi_rw |= REQ_WRITE;
1048
1049 bio->bi_bdev = bdev;
1050 bio->bi_flags |= (1 << BIO_USER_MAPPED);
1051 return bio;
1052
1053 out_unmap:
1054 for (i = 0; i < nr_pages; i++) {
1055 if(!pages[i])
1056 break;
1057 page_cache_release(pages[i]);
1058 }
1059 out:
1060 kfree(pages);
1061 bio_put(bio);
1062 return ERR_PTR(ret);
1063}
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
1078 unsigned long uaddr, unsigned int len, int write_to_vm,
1079 gfp_t gfp_mask)
1080{
1081 struct sg_iovec iov;
1082
1083 iov.iov_base = (void __user *)uaddr;
1084 iov.iov_len = len;
1085
1086 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
1087}
1088EXPORT_SYMBOL(bio_map_user);
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1103 struct sg_iovec *iov, int iov_count,
1104 int write_to_vm, gfp_t gfp_mask)
1105{
1106 struct bio *bio;
1107
1108 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
1109 gfp_mask);
1110 if (IS_ERR(bio))
1111 return bio;
1112
1113
1114
1115
1116
1117
1118
1119 bio_get(bio);
1120
1121 return bio;
1122}
1123
1124static void __bio_unmap_user(struct bio *bio)
1125{
1126 struct bio_vec *bvec;
1127 int i;
1128
1129
1130
1131
1132 __bio_for_each_segment(bvec, bio, i, 0) {
1133 if (bio_data_dir(bio) == READ)
1134 set_page_dirty_lock(bvec->bv_page);
1135
1136 page_cache_release(bvec->bv_page);
1137 }
1138
1139 bio_put(bio);
1140}
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151void bio_unmap_user(struct bio *bio)
1152{
1153 __bio_unmap_user(bio);
1154 bio_put(bio);
1155}
1156EXPORT_SYMBOL(bio_unmap_user);
1157
1158static void bio_map_kern_endio(struct bio *bio, int err)
1159{
1160 bio_put(bio);
1161}
1162
1163static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1164 unsigned int len, gfp_t gfp_mask)
1165{
1166 unsigned long kaddr = (unsigned long)data;
1167 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1168 unsigned long start = kaddr >> PAGE_SHIFT;
1169 const int nr_pages = end - start;
1170 int offset, i;
1171 struct bio *bio;
1172
1173 bio = bio_kmalloc(gfp_mask, nr_pages);
1174 if (!bio)
1175 return ERR_PTR(-ENOMEM);
1176
1177 offset = offset_in_page(kaddr);
1178 for (i = 0; i < nr_pages; i++) {
1179 unsigned int bytes = PAGE_SIZE - offset;
1180
1181 if (len <= 0)
1182 break;
1183
1184 if (bytes > len)
1185 bytes = len;
1186
1187 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1188 offset) < bytes)
1189 break;
1190
1191 data += bytes;
1192 len -= bytes;
1193 offset = 0;
1194 }
1195
1196 bio->bi_end_io = bio_map_kern_endio;
1197 return bio;
1198}
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1211 gfp_t gfp_mask)
1212{
1213 struct bio *bio;
1214
1215 bio = __bio_map_kern(q, data, len, gfp_mask);
1216 if (IS_ERR(bio))
1217 return bio;
1218
1219 if (bio->bi_size == len)
1220 return bio;
1221
1222
1223
1224
1225 bio_put(bio);
1226 return ERR_PTR(-EINVAL);
1227}
1228EXPORT_SYMBOL(bio_map_kern);
1229
1230static void bio_copy_kern_endio(struct bio *bio, int err)
1231{
1232 struct bio_vec *bvec;
1233 const int read = bio_data_dir(bio) == READ;
1234 struct bio_map_data *bmd = bio->bi_private;
1235 int i;
1236 char *p = bmd->sgvecs[0].iov_base;
1237
1238 __bio_for_each_segment(bvec, bio, i, 0) {
1239 char *addr = page_address(bvec->bv_page);
1240 int len = bmd->iovecs[i].bv_len;
1241
1242 if (read)
1243 memcpy(p, addr, len);
1244
1245 __free_page(bvec->bv_page);
1246 p += len;
1247 }
1248
1249 bio_free_map_data(bmd);
1250 bio_put(bio);
1251}
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1265 gfp_t gfp_mask, int reading)
1266{
1267 struct bio *bio;
1268 struct bio_vec *bvec;
1269 int i;
1270
1271 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1272 if (IS_ERR(bio))
1273 return bio;
1274
1275 if (!reading) {
1276 void *p = data;
1277
1278 bio_for_each_segment(bvec, bio, i) {
1279 char *addr = page_address(bvec->bv_page);
1280
1281 memcpy(addr, p, bvec->bv_len);
1282 p += bvec->bv_len;
1283 }
1284 }
1285
1286 bio->bi_end_io = bio_copy_kern_endio;
1287
1288 return bio;
1289}
1290EXPORT_SYMBOL(bio_copy_kern);
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321void bio_set_pages_dirty(struct bio *bio)
1322{
1323 struct bio_vec *bvec = bio->bi_io_vec;
1324 int i;
1325
1326 for (i = 0; i < bio->bi_vcnt; i++) {
1327 struct page *page = bvec[i].bv_page;
1328
1329 if (page && !PageCompound(page))
1330 set_page_dirty_lock(page);
1331 }
1332}
1333
1334static void bio_release_pages(struct bio *bio)
1335{
1336 struct bio_vec *bvec = bio->bi_io_vec;
1337 int i;
1338
1339 for (i = 0; i < bio->bi_vcnt; i++) {
1340 struct page *page = bvec[i].bv_page;
1341
1342 if (page)
1343 put_page(page);
1344 }
1345}
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358static void bio_dirty_fn(struct work_struct *work);
1359
1360static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1361static DEFINE_SPINLOCK(bio_dirty_lock);
1362static struct bio *bio_dirty_list;
1363
1364
1365
1366
1367static void bio_dirty_fn(struct work_struct *work)
1368{
1369 unsigned long flags;
1370 struct bio *bio;
1371
1372 spin_lock_irqsave(&bio_dirty_lock, flags);
1373 bio = bio_dirty_list;
1374 bio_dirty_list = NULL;
1375 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1376
1377 while (bio) {
1378 struct bio *next = bio->bi_private;
1379
1380 bio_set_pages_dirty(bio);
1381 bio_release_pages(bio);
1382 bio_put(bio);
1383 bio = next;
1384 }
1385}
1386
1387void bio_check_pages_dirty(struct bio *bio)
1388{
1389 struct bio_vec *bvec = bio->bi_io_vec;
1390 int nr_clean_pages = 0;
1391 int i;
1392
1393 for (i = 0; i < bio->bi_vcnt; i++) {
1394 struct page *page = bvec[i].bv_page;
1395
1396 if (PageDirty(page) || PageCompound(page)) {
1397 page_cache_release(page);
1398 bvec[i].bv_page = NULL;
1399 } else {
1400 nr_clean_pages++;
1401 }
1402 }
1403
1404 if (nr_clean_pages) {
1405 unsigned long flags;
1406
1407 spin_lock_irqsave(&bio_dirty_lock, flags);
1408 bio->bi_private = bio_dirty_list;
1409 bio_dirty_list = bio;
1410 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1411 schedule_work(&bio_dirty_work);
1412 } else {
1413 bio_put(bio);
1414 }
1415}
1416
1417#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1418void bio_flush_dcache_pages(struct bio *bi)
1419{
1420 int i;
1421 struct bio_vec *bvec;
1422
1423 bio_for_each_segment(bvec, bi, i)
1424 flush_dcache_page(bvec->bv_page);
1425}
1426EXPORT_SYMBOL(bio_flush_dcache_pages);
1427#endif
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443void bio_endio(struct bio *bio, int error)
1444{
1445 if (error)
1446 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1447 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1448 error = -EIO;
1449
1450 if (bio->bi_end_io)
1451 bio->bi_end_io(bio, error);
1452}
1453EXPORT_SYMBOL(bio_endio);
1454
1455void bio_pair_release(struct bio_pair *bp)
1456{
1457 if (atomic_dec_and_test(&bp->cnt)) {
1458 struct bio *master = bp->bio1.bi_private;
1459
1460 bio_endio(master, bp->error);
1461 mempool_free(bp, bp->bio2.bi_private);
1462 }
1463}
1464EXPORT_SYMBOL(bio_pair_release);
1465
1466static void bio_pair_end_1(struct bio *bi, int err)
1467{
1468 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1469
1470 if (err)
1471 bp->error = err;
1472
1473 bio_pair_release(bp);
1474}
1475
1476static void bio_pair_end_2(struct bio *bi, int err)
1477{
1478 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1479
1480 if (err)
1481 bp->error = err;
1482
1483 bio_pair_release(bp);
1484}
1485
1486
1487
1488
1489struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1490{
1491 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1492
1493 if (!bp)
1494 return bp;
1495
1496 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1497 bi->bi_sector + first_sectors);
1498
1499 BUG_ON(bi->bi_vcnt != 1);
1500 BUG_ON(bi->bi_idx != 0);
1501 atomic_set(&bp->cnt, 3);
1502 bp->error = 0;
1503 bp->bio1 = *bi;
1504 bp->bio2 = *bi;
1505 bp->bio2.bi_sector += first_sectors;
1506 bp->bio2.bi_size -= first_sectors << 9;
1507 bp->bio1.bi_size = first_sectors << 9;
1508
1509 bp->bv1 = bi->bi_io_vec[0];
1510 bp->bv2 = bi->bi_io_vec[0];
1511 bp->bv2.bv_offset += first_sectors << 9;
1512 bp->bv2.bv_len -= first_sectors << 9;
1513 bp->bv1.bv_len = first_sectors << 9;
1514
1515 bp->bio1.bi_io_vec = &bp->bv1;
1516 bp->bio2.bi_io_vec = &bp->bv2;
1517
1518 bp->bio1.bi_max_vecs = 1;
1519 bp->bio2.bi_max_vecs = 1;
1520
1521 bp->bio1.bi_end_io = bio_pair_end_1;
1522 bp->bio2.bi_end_io = bio_pair_end_2;
1523
1524 bp->bio1.bi_private = bi;
1525 bp->bio2.bi_private = bio_split_pool;
1526
1527 if (bio_integrity(bi))
1528 bio_integrity_split(bi, bp, first_sectors);
1529
1530 return bp;
1531}
1532EXPORT_SYMBOL(bio_split);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1545 unsigned int offset)
1546{
1547 unsigned int sector_sz;
1548 struct bio_vec *bv;
1549 sector_t sectors;
1550 int i;
1551
1552 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
1553 sectors = 0;
1554
1555 if (index >= bio->bi_idx)
1556 index = bio->bi_vcnt - 1;
1557
1558 __bio_for_each_segment(bv, bio, i, 0) {
1559 if (i == index) {
1560 if (offset > bv->bv_offset)
1561 sectors += (offset - bv->bv_offset) / sector_sz;
1562 break;
1563 }
1564
1565 sectors += bv->bv_len / sector_sz;
1566 }
1567
1568 return sectors;
1569}
1570EXPORT_SYMBOL(bio_sector_offset);
1571
1572
1573
1574
1575
1576static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1577{
1578 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1579
1580 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1581 if (!bs->bvec_pool)
1582 return -ENOMEM;
1583
1584 return 0;
1585}
1586
1587static void biovec_free_pools(struct bio_set *bs)
1588{
1589 mempool_destroy(bs->bvec_pool);
1590}
1591
1592void bioset_free(struct bio_set *bs)
1593{
1594 if (bs->bio_pool)
1595 mempool_destroy(bs->bio_pool);
1596
1597 bioset_integrity_free(bs);
1598 biovec_free_pools(bs);
1599 bio_put_slab(bs);
1600
1601 kfree(bs);
1602}
1603EXPORT_SYMBOL(bioset_free);
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1619{
1620 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1621 struct bio_set *bs;
1622
1623 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1624 if (!bs)
1625 return NULL;
1626
1627 bs->front_pad = front_pad;
1628
1629 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1630 if (!bs->bio_slab) {
1631 kfree(bs);
1632 return NULL;
1633 }
1634
1635 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1636 if (!bs->bio_pool)
1637 goto bad;
1638
1639 if (!biovec_create_pools(bs, pool_size))
1640 return bs;
1641
1642bad:
1643 bioset_free(bs);
1644 return NULL;
1645}
1646EXPORT_SYMBOL(bioset_create);
1647
1648static void __init biovec_init_slabs(void)
1649{
1650 int i;
1651
1652 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1653 int size;
1654 struct biovec_slab *bvs = bvec_slabs + i;
1655
1656 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1657 bvs->slab = NULL;
1658 continue;
1659 }
1660
1661 size = bvs->nr_vecs * sizeof(struct bio_vec);
1662 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1663 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1664 }
1665}
1666
1667static int __init init_bio(void)
1668{
1669 bio_slab_max = 2;
1670 bio_slab_nr = 0;
1671 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1672 if (!bio_slabs)
1673 panic("bio: can't allocate bios\n");
1674
1675 bio_integrity_init();
1676 biovec_init_slabs();
1677
1678 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1679 if (!fs_bio_set)
1680 panic("bio: can't allocate bios\n");
1681
1682 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
1683 panic("bio: can't create integrity pool\n");
1684
1685 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1686 sizeof(struct bio_pair));
1687 if (!bio_split_pool)
1688 panic("bio: can't create split pool\n");
1689
1690 return 0;
1691}
1692subsys_initcall(init_bio);
1693