1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/mempool.h>
27#include <linux/workqueue.h>
28#include <scsi/sg.h>
29
30#include <trace/events/block.h>
31
32
33
34
35
36#define BIO_INLINE_VECS 4
37
38static mempool_t *bio_split_pool __read_mostly;
39
40
41
42
43
44
45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
46struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
48};
49#undef BV
50
51
52
53
54
55struct bio_set *fs_bio_set;
56
57
58
59
60struct bio_slab {
61 struct kmem_cache *slab;
62 unsigned int slab_ref;
63 unsigned int slab_size;
64 char name[8];
65};
66static DEFINE_MUTEX(bio_slab_lock);
67static struct bio_slab *bio_slabs;
68static unsigned int bio_slab_nr, bio_slab_max;
69
70static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
71{
72 unsigned int sz = sizeof(struct bio) + extra_size;
73 struct kmem_cache *slab = NULL;
74 struct bio_slab *bslab;
75 unsigned int i, entry = -1;
76
77 mutex_lock(&bio_slab_lock);
78
79 i = 0;
80 while (i < bio_slab_nr) {
81 bslab = &bio_slabs[i];
82
83 if (!bslab->slab && entry == -1)
84 entry = i;
85 else if (bslab->slab_size == sz) {
86 slab = bslab->slab;
87 bslab->slab_ref++;
88 break;
89 }
90 i++;
91 }
92
93 if (slab)
94 goto out_unlock;
95
96 if (bio_slab_nr == bio_slab_max && entry == -1) {
97 bio_slab_max <<= 1;
98 bio_slabs = krealloc(bio_slabs,
99 bio_slab_max * sizeof(struct bio_slab),
100 GFP_KERNEL);
101 if (!bio_slabs)
102 goto out_unlock;
103 }
104 if (entry == -1)
105 entry = bio_slab_nr++;
106
107 bslab = &bio_slabs[entry];
108
109 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
110 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
111 if (!slab)
112 goto out_unlock;
113
114 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
115 bslab->slab = slab;
116 bslab->slab_ref = 1;
117 bslab->slab_size = sz;
118out_unlock:
119 mutex_unlock(&bio_slab_lock);
120 return slab;
121}
122
123static void bio_put_slab(struct bio_set *bs)
124{
125 struct bio_slab *bslab = NULL;
126 unsigned int i;
127
128 mutex_lock(&bio_slab_lock);
129
130 for (i = 0; i < bio_slab_nr; i++) {
131 if (bs->bio_slab == bio_slabs[i].slab) {
132 bslab = &bio_slabs[i];
133 break;
134 }
135 }
136
137 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
138 goto out;
139
140 WARN_ON(!bslab->slab_ref);
141
142 if (--bslab->slab_ref)
143 goto out;
144
145 kmem_cache_destroy(bslab->slab);
146 bslab->slab = NULL;
147
148out:
149 mutex_unlock(&bio_slab_lock);
150}
151
152unsigned int bvec_nr_vecs(unsigned short idx)
153{
154 return bvec_slabs[idx].nr_vecs;
155}
156
157void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
158{
159 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
160
161 if (idx == BIOVEC_MAX_IDX)
162 mempool_free(bv, bs->bvec_pool);
163 else {
164 struct biovec_slab *bvs = bvec_slabs + idx;
165
166 kmem_cache_free(bvs->slab, bv);
167 }
168}
169
170struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
171 struct bio_set *bs)
172{
173 struct bio_vec *bvl;
174
175
176
177
178 switch (nr) {
179 case 1:
180 *idx = 0;
181 break;
182 case 2 ... 4:
183 *idx = 1;
184 break;
185 case 5 ... 16:
186 *idx = 2;
187 break;
188 case 17 ... 64:
189 *idx = 3;
190 break;
191 case 65 ... 128:
192 *idx = 4;
193 break;
194 case 129 ... BIO_MAX_PAGES:
195 *idx = 5;
196 break;
197 default:
198 return NULL;
199 }
200
201
202
203
204
205 if (*idx == BIOVEC_MAX_IDX) {
206fallback:
207 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
208 } else {
209 struct biovec_slab *bvs = bvec_slabs + *idx;
210 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
211
212
213
214
215
216
217 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
218
219
220
221
222
223 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
224 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
225 *idx = BIOVEC_MAX_IDX;
226 goto fallback;
227 }
228 }
229
230 return bvl;
231}
232
233void bio_free(struct bio *bio, struct bio_set *bs)
234{
235 void *p;
236
237 if (bio_has_allocated_vec(bio))
238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
239
240 if (bio_integrity(bio))
241 bio_integrity_free(bio, bs);
242
243
244
245
246 p = bio;
247 if (bs->front_pad)
248 p -= bs->front_pad;
249
250 mempool_free(p, bs->bio_pool);
251}
252EXPORT_SYMBOL(bio_free);
253
254void bio_init(struct bio *bio)
255{
256 memset(bio, 0, sizeof(*bio));
257 bio->bi_flags = 1 << BIO_UPTODATE;
258 bio->bi_comp_cpu = -1;
259 atomic_set(&bio->bi_cnt, 1);
260}
261EXPORT_SYMBOL(bio_init);
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
279{
280 unsigned long idx = BIO_POOL_NONE;
281 struct bio_vec *bvl = NULL;
282 struct bio *bio;
283 void *p;
284
285 p = mempool_alloc(bs->bio_pool, gfp_mask);
286 if (unlikely(!p))
287 return NULL;
288 bio = p + bs->front_pad;
289
290 bio_init(bio);
291
292 if (unlikely(!nr_iovecs))
293 goto out_set;
294
295 if (nr_iovecs <= BIO_INLINE_VECS) {
296 bvl = bio->bi_inline_vecs;
297 nr_iovecs = BIO_INLINE_VECS;
298 } else {
299 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
300 if (unlikely(!bvl))
301 goto err_free;
302
303 nr_iovecs = bvec_nr_vecs(idx);
304 }
305out_set:
306 bio->bi_flags |= idx << BIO_POOL_OFFSET;
307 bio->bi_max_vecs = nr_iovecs;
308 bio->bi_io_vec = bvl;
309 return bio;
310
311err_free:
312 mempool_free(p, bs->bio_pool);
313 return NULL;
314}
315EXPORT_SYMBOL(bio_alloc_bioset);
316
317static void bio_fs_destructor(struct bio *bio)
318{
319 bio_free(bio, fs_bio_set);
320}
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
342{
343 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
344
345 if (bio)
346 bio->bi_destructor = bio_fs_destructor;
347
348 return bio;
349}
350EXPORT_SYMBOL(bio_alloc);
351
352static void bio_kmalloc_destructor(struct bio *bio)
353{
354 if (bio_integrity(bio))
355 bio_integrity_free(bio, fs_bio_set);
356 kfree(bio);
357}
358
359
360
361
362
363
364
365
366
367
368
369struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
370{
371 struct bio *bio;
372
373 if (nr_iovecs > UIO_MAXIOV)
374 return NULL;
375
376 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
377 gfp_mask);
378 if (unlikely(!bio))
379 return NULL;
380
381 bio_init(bio);
382 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
383 bio->bi_max_vecs = nr_iovecs;
384 bio->bi_io_vec = bio->bi_inline_vecs;
385 bio->bi_destructor = bio_kmalloc_destructor;
386
387 return bio;
388}
389EXPORT_SYMBOL(bio_kmalloc);
390
391void zero_fill_bio(struct bio *bio)
392{
393 unsigned long flags;
394 struct bio_vec *bv;
395 int i;
396
397 bio_for_each_segment(bv, bio, i) {
398 char *data = bvec_kmap_irq(bv, &flags);
399 memset(data, 0, bv->bv_len);
400 flush_dcache_page(bv->bv_page);
401 bvec_kunmap_irq(data, &flags);
402 }
403}
404EXPORT_SYMBOL(zero_fill_bio);
405
406
407
408
409
410
411
412
413
414void bio_put(struct bio *bio)
415{
416 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
417
418
419
420
421 if (atomic_dec_and_test(&bio->bi_cnt)) {
422 bio->bi_next = NULL;
423 bio->bi_destructor(bio);
424 }
425}
426EXPORT_SYMBOL(bio_put);
427
428inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
429{
430 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
431 blk_recount_segments(q, bio);
432
433 return bio->bi_phys_segments;
434}
435EXPORT_SYMBOL(bio_phys_segments);
436
437
438
439
440
441
442
443
444
445
446void __bio_clone(struct bio *bio, struct bio *bio_src)
447{
448 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
449 bio_src->bi_max_vecs * sizeof(struct bio_vec));
450
451
452
453
454
455 bio->bi_sector = bio_src->bi_sector;
456 bio->bi_bdev = bio_src->bi_bdev;
457 bio->bi_flags |= 1 << BIO_CLONED;
458 bio->bi_rw = bio_src->bi_rw;
459 bio->bi_vcnt = bio_src->bi_vcnt;
460 bio->bi_size = bio_src->bi_size;
461 bio->bi_idx = bio_src->bi_idx;
462}
463EXPORT_SYMBOL(__bio_clone);
464
465
466
467
468
469
470
471
472struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
473{
474 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
475
476 if (!b)
477 return NULL;
478
479 b->bi_destructor = bio_fs_destructor;
480 __bio_clone(b, bio);
481
482 if (bio_integrity(bio)) {
483 int ret;
484
485 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
486
487 if (ret < 0) {
488 bio_put(b);
489 return NULL;
490 }
491 }
492
493 return b;
494}
495EXPORT_SYMBOL(bio_clone);
496
497
498
499
500
501
502
503
504
505
506int bio_get_nr_vecs(struct block_device *bdev)
507{
508 struct request_queue *q = bdev_get_queue(bdev);
509 int nr_pages;
510
511 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
512 if (nr_pages > queue_max_segments(q))
513 nr_pages = queue_max_segments(q);
514
515 return nr_pages;
516}
517EXPORT_SYMBOL(bio_get_nr_vecs);
518
519static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
520 *page, unsigned int len, unsigned int offset,
521 unsigned short max_sectors)
522{
523 int retried_segments = 0;
524 struct bio_vec *bvec;
525
526
527
528
529 if (unlikely(bio_flagged(bio, BIO_CLONED)))
530 return 0;
531
532 if (((bio->bi_size + len) >> 9) > max_sectors)
533 return 0;
534
535
536
537
538
539
540 if (bio->bi_vcnt > 0) {
541 struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
542
543 if (page == prev->bv_page &&
544 offset == prev->bv_offset + prev->bv_len) {
545 unsigned int prev_bv_len = prev->bv_len;
546 prev->bv_len += len;
547
548 if (q->merge_bvec_fn) {
549 struct bvec_merge_data bvm = {
550
551
552
553
554 .bi_bdev = bio->bi_bdev,
555 .bi_sector = bio->bi_sector,
556 .bi_size = bio->bi_size - prev_bv_len,
557 .bi_rw = bio->bi_rw,
558 };
559
560 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
561 prev->bv_len -= len;
562 return 0;
563 }
564 }
565
566 goto done;
567 }
568 }
569
570 if (bio->bi_vcnt >= bio->bi_max_vecs)
571 return 0;
572
573
574
575
576
577
578 while (bio->bi_phys_segments >= queue_max_segments(q)) {
579
580 if (retried_segments)
581 return 0;
582
583 retried_segments = 1;
584 blk_recount_segments(q, bio);
585 }
586
587
588
589
590
591 bvec = &bio->bi_io_vec[bio->bi_vcnt];
592 bvec->bv_page = page;
593 bvec->bv_len = len;
594 bvec->bv_offset = offset;
595
596
597
598
599
600
601 if (q->merge_bvec_fn) {
602 struct bvec_merge_data bvm = {
603 .bi_bdev = bio->bi_bdev,
604 .bi_sector = bio->bi_sector,
605 .bi_size = bio->bi_size,
606 .bi_rw = bio->bi_rw,
607 };
608
609
610
611
612
613 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
614 bvec->bv_page = NULL;
615 bvec->bv_len = 0;
616 bvec->bv_offset = 0;
617 return 0;
618 }
619 }
620
621
622 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
623 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
624
625 bio->bi_vcnt++;
626 bio->bi_phys_segments++;
627 done:
628 bio->bi_size += len;
629 return len;
630}
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
647 unsigned int len, unsigned int offset)
648{
649 return __bio_add_page(q, bio, page, len, offset,
650 queue_max_hw_sectors(q));
651}
652EXPORT_SYMBOL(bio_add_pc_page);
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
668 unsigned int offset)
669{
670 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
671 return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
672}
673EXPORT_SYMBOL(bio_add_page);
674
675struct bio_map_data {
676 struct bio_vec *iovecs;
677 struct sg_iovec *sgvecs;
678 int nr_sgvecs;
679 int is_our_pages;
680};
681
682static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
683 struct sg_iovec *iov, int iov_count,
684 int is_our_pages)
685{
686 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
687 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
688 bmd->nr_sgvecs = iov_count;
689 bmd->is_our_pages = is_our_pages;
690 bio->bi_private = bmd;
691}
692
693static void bio_free_map_data(struct bio_map_data *bmd)
694{
695 kfree(bmd->iovecs);
696 kfree(bmd->sgvecs);
697 kfree(bmd);
698}
699
700static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
701 gfp_t gfp_mask)
702{
703 struct bio_map_data *bmd;
704
705 if (iov_count > UIO_MAXIOV)
706 return NULL;
707
708 bmd = kmalloc(sizeof(*bmd), gfp_mask);
709 if (!bmd)
710 return NULL;
711
712 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
713 if (!bmd->iovecs) {
714 kfree(bmd);
715 return NULL;
716 }
717
718 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
719 if (bmd->sgvecs)
720 return bmd;
721
722 kfree(bmd->iovecs);
723 kfree(bmd);
724 return NULL;
725}
726
727static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
728 struct sg_iovec *iov, int iov_count,
729 int to_user, int from_user, int do_free_page)
730{
731 int ret = 0, i;
732 struct bio_vec *bvec;
733 int iov_idx = 0;
734 unsigned int iov_off = 0;
735
736 __bio_for_each_segment(bvec, bio, i, 0) {
737 char *bv_addr = page_address(bvec->bv_page);
738 unsigned int bv_len = iovecs[i].bv_len;
739
740 while (bv_len && iov_idx < iov_count) {
741 unsigned int bytes;
742 char __user *iov_addr;
743
744 bytes = min_t(unsigned int,
745 iov[iov_idx].iov_len - iov_off, bv_len);
746 iov_addr = iov[iov_idx].iov_base + iov_off;
747
748 if (!ret) {
749 if (to_user)
750 ret = copy_to_user(iov_addr, bv_addr,
751 bytes);
752
753 if (from_user)
754 ret = copy_from_user(bv_addr, iov_addr,
755 bytes);
756
757 if (ret)
758 ret = -EFAULT;
759 }
760
761 bv_len -= bytes;
762 bv_addr += bytes;
763 iov_addr += bytes;
764 iov_off += bytes;
765
766 if (iov[iov_idx].iov_len == iov_off) {
767 iov_idx++;
768 iov_off = 0;
769 }
770 }
771
772 if (do_free_page)
773 __free_page(bvec->bv_page);
774 }
775
776 return ret;
777}
778
779
780
781
782
783
784
785
786int bio_uncopy_user(struct bio *bio)
787{
788 struct bio_map_data *bmd = bio->bi_private;
789 int ret = 0;
790
791 if (!bio_flagged(bio, BIO_NULL_MAPPED))
792 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
793 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
794 0, bmd->is_our_pages);
795 bio_free_map_data(bmd);
796 bio_put(bio);
797 return ret;
798}
799EXPORT_SYMBOL(bio_uncopy_user);
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814struct bio *bio_copy_user_iov(struct request_queue *q,
815 struct rq_map_data *map_data,
816 struct sg_iovec *iov, int iov_count,
817 int write_to_vm, gfp_t gfp_mask)
818{
819 struct bio_map_data *bmd;
820 struct bio_vec *bvec;
821 struct page *page;
822 struct bio *bio;
823 int i, ret;
824 int nr_pages = 0;
825 unsigned int len = 0;
826 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
827
828 for (i = 0; i < iov_count; i++) {
829 unsigned long uaddr;
830 unsigned long end;
831 unsigned long start;
832
833 uaddr = (unsigned long)iov[i].iov_base;
834 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
835 start = uaddr >> PAGE_SHIFT;
836
837
838
839
840 if (end < start)
841 return ERR_PTR(-EINVAL);
842
843 nr_pages += end - start;
844 len += iov[i].iov_len;
845 }
846
847 if (offset)
848 nr_pages++;
849
850 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
851 if (!bmd)
852 return ERR_PTR(-ENOMEM);
853
854 ret = -ENOMEM;
855 bio = bio_kmalloc(gfp_mask, nr_pages);
856 if (!bio)
857 goto out_bmd;
858
859 if (!write_to_vm)
860 bio->bi_rw |= REQ_WRITE;
861
862 ret = 0;
863
864 if (map_data) {
865 nr_pages = 1 << map_data->page_order;
866 i = map_data->offset / PAGE_SIZE;
867 }
868 while (len) {
869 unsigned int bytes = PAGE_SIZE;
870
871 bytes -= offset;
872
873 if (bytes > len)
874 bytes = len;
875
876 if (map_data) {
877 if (i == map_data->nr_entries * nr_pages) {
878 ret = -ENOMEM;
879 break;
880 }
881
882 page = map_data->pages[i / nr_pages];
883 page += (i % nr_pages);
884
885 i++;
886 } else {
887 page = alloc_page(q->bounce_gfp | gfp_mask);
888 if (!page) {
889 ret = -ENOMEM;
890 break;
891 }
892 }
893
894 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
895 break;
896
897 len -= bytes;
898 offset = 0;
899 }
900
901 if (ret)
902 goto cleanup;
903
904
905
906
907 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
908 (map_data && map_data->from_user)) {
909 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
910 if (ret)
911 goto cleanup;
912 }
913
914 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
915 return bio;
916cleanup:
917 if (!map_data)
918 bio_for_each_segment(bvec, bio, i)
919 __free_page(bvec->bv_page);
920
921 bio_put(bio);
922out_bmd:
923 bio_free_map_data(bmd);
924 return ERR_PTR(ret);
925}
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
941 unsigned long uaddr, unsigned int len,
942 int write_to_vm, gfp_t gfp_mask)
943{
944 struct sg_iovec iov;
945
946 iov.iov_base = (void __user *)uaddr;
947 iov.iov_len = len;
948
949 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
950}
951EXPORT_SYMBOL(bio_copy_user);
952
953static struct bio *__bio_map_user_iov(struct request_queue *q,
954 struct block_device *bdev,
955 struct sg_iovec *iov, int iov_count,
956 int write_to_vm, gfp_t gfp_mask)
957{
958 int i, j;
959 int nr_pages = 0;
960 struct page **pages;
961 struct bio *bio;
962 int cur_page = 0;
963 int ret, offset;
964
965 for (i = 0; i < iov_count; i++) {
966 unsigned long uaddr = (unsigned long)iov[i].iov_base;
967 unsigned long len = iov[i].iov_len;
968 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
969 unsigned long start = uaddr >> PAGE_SHIFT;
970
971
972
973
974 if (end < start)
975 return ERR_PTR(-EINVAL);
976
977 nr_pages += end - start;
978
979
980
981 if (uaddr & queue_dma_alignment(q))
982 return ERR_PTR(-EINVAL);
983 }
984
985 if (!nr_pages)
986 return ERR_PTR(-EINVAL);
987
988 bio = bio_kmalloc(gfp_mask, nr_pages);
989 if (!bio)
990 return ERR_PTR(-ENOMEM);
991
992 ret = -ENOMEM;
993 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
994 if (!pages)
995 goto out;
996
997 for (i = 0; i < iov_count; i++) {
998 unsigned long uaddr = (unsigned long)iov[i].iov_base;
999 unsigned long len = iov[i].iov_len;
1000 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1001 unsigned long start = uaddr >> PAGE_SHIFT;
1002 const int local_nr_pages = end - start;
1003 const int page_limit = cur_page + local_nr_pages;
1004
1005 ret = get_user_pages_fast(uaddr, local_nr_pages,
1006 write_to_vm, &pages[cur_page]);
1007 if (ret < local_nr_pages) {
1008 ret = -EFAULT;
1009 goto out_unmap;
1010 }
1011
1012 offset = uaddr & ~PAGE_MASK;
1013 for (j = cur_page; j < page_limit; j++) {
1014 unsigned int bytes = PAGE_SIZE - offset;
1015
1016 if (len <= 0)
1017 break;
1018
1019 if (bytes > len)
1020 bytes = len;
1021
1022
1023
1024
1025 if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
1026 bytes)
1027 break;
1028
1029 len -= bytes;
1030 offset = 0;
1031 }
1032
1033 cur_page = j;
1034
1035
1036
1037 while (j < page_limit)
1038 page_cache_release(pages[j++]);
1039 }
1040
1041 kfree(pages);
1042
1043
1044
1045
1046 if (!write_to_vm)
1047 bio->bi_rw |= REQ_WRITE;
1048
1049 bio->bi_bdev = bdev;
1050 bio->bi_flags |= (1 << BIO_USER_MAPPED);
1051 return bio;
1052
1053 out_unmap:
1054 for (i = 0; i < nr_pages; i++) {
1055 if(!pages[i])
1056 break;
1057 page_cache_release(pages[i]);
1058 }
1059 out:
1060 kfree(pages);
1061 bio_put(bio);
1062 return ERR_PTR(ret);
1063}
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
1078 unsigned long uaddr, unsigned int len, int write_to_vm,
1079 gfp_t gfp_mask)
1080{
1081 struct sg_iovec iov;
1082
1083 iov.iov_base = (void __user *)uaddr;
1084 iov.iov_len = len;
1085
1086 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
1087}
1088EXPORT_SYMBOL(bio_map_user);
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1103 struct sg_iovec *iov, int iov_count,
1104 int write_to_vm, gfp_t gfp_mask)
1105{
1106 struct bio *bio;
1107
1108 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
1109 gfp_mask);
1110 if (IS_ERR(bio))
1111 return bio;
1112
1113
1114
1115
1116
1117
1118
1119 bio_get(bio);
1120
1121 return bio;
1122}
1123
1124static void __bio_unmap_user(struct bio *bio)
1125{
1126 struct bio_vec *bvec;
1127 int i;
1128
1129
1130
1131
1132 __bio_for_each_segment(bvec, bio, i, 0) {
1133 if (bio_data_dir(bio) == READ)
1134 set_page_dirty_lock(bvec->bv_page);
1135
1136 page_cache_release(bvec->bv_page);
1137 }
1138
1139 bio_put(bio);
1140}
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151void bio_unmap_user(struct bio *bio)
1152{
1153 __bio_unmap_user(bio);
1154 bio_put(bio);
1155}
1156EXPORT_SYMBOL(bio_unmap_user);
1157
1158static void bio_map_kern_endio(struct bio *bio, int err)
1159{
1160 bio_put(bio);
1161}
1162
1163static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1164 unsigned int len, gfp_t gfp_mask)
1165{
1166 unsigned long kaddr = (unsigned long)data;
1167 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1168 unsigned long start = kaddr >> PAGE_SHIFT;
1169 const int nr_pages = end - start;
1170 int offset, i;
1171 struct bio *bio;
1172
1173 bio = bio_kmalloc(gfp_mask, nr_pages);
1174 if (!bio)
1175 return ERR_PTR(-ENOMEM);
1176
1177 offset = offset_in_page(kaddr);
1178 for (i = 0; i < nr_pages; i++) {
1179 unsigned int bytes = PAGE_SIZE - offset;
1180
1181 if (len <= 0)
1182 break;
1183
1184 if (bytes > len)
1185 bytes = len;
1186
1187 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1188 offset) < bytes)
1189 break;
1190
1191 data += bytes;
1192 len -= bytes;
1193 offset = 0;
1194 }
1195
1196 bio->bi_end_io = bio_map_kern_endio;
1197 return bio;
1198}
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1211 gfp_t gfp_mask)
1212{
1213 struct bio *bio;
1214
1215 bio = __bio_map_kern(q, data, len, gfp_mask);
1216 if (IS_ERR(bio))
1217 return bio;
1218
1219 if (bio->bi_size == len)
1220 return bio;
1221
1222
1223
1224
1225 bio_put(bio);
1226 return ERR_PTR(-EINVAL);
1227}
1228EXPORT_SYMBOL(bio_map_kern);
1229
1230static void bio_copy_kern_endio(struct bio *bio, int err)
1231{
1232 struct bio_vec *bvec;
1233 const int read = bio_data_dir(bio) == READ;
1234 struct bio_map_data *bmd = bio->bi_private;
1235 int i;
1236 char *p = bmd->sgvecs[0].iov_base;
1237
1238 __bio_for_each_segment(bvec, bio, i, 0) {
1239 char *addr = page_address(bvec->bv_page);
1240 int len = bmd->iovecs[i].bv_len;
1241
1242 if (read)
1243 memcpy(p, addr, len);
1244
1245 __free_page(bvec->bv_page);
1246 p += len;
1247 }
1248
1249 bio_free_map_data(bmd);
1250 bio_put(bio);
1251}
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1265 gfp_t gfp_mask, int reading)
1266{
1267 struct bio *bio;
1268 struct bio_vec *bvec;
1269 int i;
1270
1271 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1272 if (IS_ERR(bio))
1273 return bio;
1274
1275 if (!reading) {
1276 void *p = data;
1277
1278 bio_for_each_segment(bvec, bio, i) {
1279 char *addr = page_address(bvec->bv_page);
1280
1281 memcpy(addr, p, bvec->bv_len);
1282 p += bvec->bv_len;
1283 }
1284 }
1285
1286 bio->bi_end_io = bio_copy_kern_endio;
1287
1288 return bio;
1289}
1290EXPORT_SYMBOL(bio_copy_kern);
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321void bio_set_pages_dirty(struct bio *bio)
1322{
1323 struct bio_vec *bvec = bio->bi_io_vec;
1324 int i;
1325
1326 for (i = 0; i < bio->bi_vcnt; i++) {
1327 struct page *page = bvec[i].bv_page;
1328
1329 if (page && !PageCompound(page))
1330 set_page_dirty_lock(page);
1331 }
1332}
1333
1334static void bio_release_pages(struct bio *bio)
1335{
1336 struct bio_vec *bvec = bio->bi_io_vec;
1337 int i;
1338
1339 for (i = 0; i < bio->bi_vcnt; i++) {
1340 struct page *page = bvec[i].bv_page;
1341
1342 if (page)
1343 put_page(page);
1344 }
1345}
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358static void bio_dirty_fn(struct work_struct *work);
1359
1360static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1361static DEFINE_SPINLOCK(bio_dirty_lock);
1362static struct bio *bio_dirty_list;
1363
1364
1365
1366
1367static void bio_dirty_fn(struct work_struct *work)
1368{
1369 unsigned long flags;
1370 struct bio *bio;
1371
1372 spin_lock_irqsave(&bio_dirty_lock, flags);
1373 bio = bio_dirty_list;
1374 bio_dirty_list = NULL;
1375 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1376
1377 while (bio) {
1378 struct bio *next = bio->bi_private;
1379
1380 bio_set_pages_dirty(bio);
1381 bio_release_pages(bio);
1382 bio_put(bio);
1383 bio = next;
1384 }
1385}
1386
1387void bio_check_pages_dirty(struct bio *bio)
1388{
1389 struct bio_vec *bvec = bio->bi_io_vec;
1390 int nr_clean_pages = 0;
1391 int i;
1392
1393 for (i = 0; i < bio->bi_vcnt; i++) {
1394 struct page *page = bvec[i].bv_page;
1395
1396 if (PageDirty(page) || PageCompound(page)) {
1397 page_cache_release(page);
1398 bvec[i].bv_page = NULL;
1399 } else {
1400 nr_clean_pages++;
1401 }
1402 }
1403
1404 if (nr_clean_pages) {
1405 unsigned long flags;
1406
1407 spin_lock_irqsave(&bio_dirty_lock, flags);
1408 bio->bi_private = bio_dirty_list;
1409 bio_dirty_list = bio;
1410 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1411 schedule_work(&bio_dirty_work);
1412 } else {
1413 bio_put(bio);
1414 }
1415}
1416
1417#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1418void bio_flush_dcache_pages(struct bio *bi)
1419{
1420 int i;
1421 struct bio_vec *bvec;
1422
1423 bio_for_each_segment(bvec, bi, i)
1424 flush_dcache_page(bvec->bv_page);
1425}
1426EXPORT_SYMBOL(bio_flush_dcache_pages);
1427#endif
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443void bio_endio(struct bio *bio, int error)
1444{
1445 if (error)
1446 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1447 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1448 error = -EIO;
1449
1450 if (bio->bi_end_io)
1451 bio->bi_end_io(bio, error);
1452}
1453EXPORT_SYMBOL(bio_endio);
1454
1455void bio_pair_release(struct bio_pair *bp)
1456{
1457 if (atomic_dec_and_test(&bp->cnt)) {
1458 struct bio *master = bp->bio1.bi_private;
1459
1460 bio_endio(master, bp->error);
1461 mempool_free(bp, bp->bio2.bi_private);
1462 }
1463}
1464EXPORT_SYMBOL(bio_pair_release);
1465
1466static void bio_pair_end_1(struct bio *bi, int err)
1467{
1468 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1469
1470 if (err)
1471 bp->error = err;
1472
1473 bio_pair_release(bp);
1474}
1475
1476static void bio_pair_end_2(struct bio *bi, int err)
1477{
1478 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1479
1480 if (err)
1481 bp->error = err;
1482
1483 bio_pair_release(bp);
1484}
1485
1486
1487
1488
1489struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1490{
1491 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1492
1493 if (!bp)
1494 return bp;
1495
1496 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1497 bi->bi_sector + first_sectors);
1498
1499 BUG_ON(bi->bi_vcnt != 1);
1500 BUG_ON(bi->bi_idx != 0);
1501 atomic_set(&bp->cnt, 3);
1502 bp->error = 0;
1503 bp->bio1 = *bi;
1504 bp->bio2 = *bi;
1505 bp->bio2.bi_sector += first_sectors;
1506 bp->bio2.bi_size -= first_sectors << 9;
1507 bp->bio1.bi_size = first_sectors << 9;
1508
1509 bp->bv1 = bi->bi_io_vec[0];
1510 bp->bv2 = bi->bi_io_vec[0];
1511 bp->bv2.bv_offset += first_sectors << 9;
1512 bp->bv2.bv_len -= first_sectors << 9;
1513 bp->bv1.bv_len = first_sectors << 9;
1514
1515 bp->bio1.bi_io_vec = &bp->bv1;
1516 bp->bio2.bi_io_vec = &bp->bv2;
1517
1518 bp->bio1.bi_max_vecs = 1;
1519 bp->bio2.bi_max_vecs = 1;
1520
1521 bp->bio1.bi_end_io = bio_pair_end_1;
1522 bp->bio2.bi_end_io = bio_pair_end_2;
1523
1524 bp->bio1.bi_private = bi;
1525 bp->bio2.bi_private = bio_split_pool;
1526
1527 if (bio_integrity(bi))
1528 bio_integrity_split(bi, bp, first_sectors);
1529
1530 return bp;
1531}
1532EXPORT_SYMBOL(bio_split);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1545 unsigned int offset)
1546{
1547 unsigned int sector_sz;
1548 struct bio_vec *bv;
1549 sector_t sectors;
1550 int i;
1551
1552 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
1553 sectors = 0;
1554
1555 if (index >= bio->bi_idx)
1556 index = bio->bi_vcnt - 1;
1557
1558 __bio_for_each_segment(bv, bio, i, 0) {
1559 if (i == index) {
1560 if (offset > bv->bv_offset)
1561 sectors += (offset - bv->bv_offset) / sector_sz;
1562 break;
1563 }
1564
1565 sectors += bv->bv_len / sector_sz;
1566 }
1567
1568 return sectors;
1569}
1570EXPORT_SYMBOL(bio_sector_offset);
1571
1572
1573
1574
1575
1576static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1577{
1578 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1579
1580 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1581 if (!bs->bvec_pool)
1582 return -ENOMEM;
1583
1584 return 0;
1585}
1586
1587static void biovec_free_pools(struct bio_set *bs)
1588{
1589 mempool_destroy(bs->bvec_pool);
1590}
1591
1592void bioset_free(struct bio_set *bs)
1593{
1594 if (bs->bio_pool)
1595 mempool_destroy(bs->bio_pool);
1596
1597 bioset_integrity_free(bs);
1598 biovec_free_pools(bs);
1599 bio_put_slab(bs);
1600
1601 kfree(bs);
1602}
1603EXPORT_SYMBOL(bioset_free);
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1619{
1620 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1621 struct bio_set *bs;
1622
1623 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1624 if (!bs)
1625 return NULL;
1626
1627 bs->front_pad = front_pad;
1628
1629 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1630 if (!bs->bio_slab) {
1631 kfree(bs);
1632 return NULL;
1633 }
1634
1635 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1636 if (!bs->bio_pool)
1637 goto bad;
1638
1639 if (bioset_integrity_create(bs, pool_size))
1640 goto bad;
1641
1642 if (!biovec_create_pools(bs, pool_size))
1643 return bs;
1644
1645bad:
1646 bioset_free(bs);
1647 return NULL;
1648}
1649EXPORT_SYMBOL(bioset_create);
1650
1651static void __init biovec_init_slabs(void)
1652{
1653 int i;
1654
1655 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1656 int size;
1657 struct biovec_slab *bvs = bvec_slabs + i;
1658
1659#ifndef CONFIG_BLK_DEV_INTEGRITY
1660 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1661 bvs->slab = NULL;
1662 continue;
1663 }
1664#endif
1665
1666 size = bvs->nr_vecs * sizeof(struct bio_vec);
1667 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1668 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1669 }
1670}
1671
1672static int __init init_bio(void)
1673{
1674 bio_slab_max = 2;
1675 bio_slab_nr = 0;
1676 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1677 if (!bio_slabs)
1678 panic("bio: can't allocate bios\n");
1679
1680 bio_integrity_init();
1681 biovec_init_slabs();
1682
1683 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1684 if (!fs_bio_set)
1685 panic("bio: can't allocate bios\n");
1686
1687 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1688 sizeof(struct bio_pair));
1689 if (!bio_split_pool)
1690 panic("bio: can't create split pool\n");
1691
1692 return 0;
1693}
1694subsys_initcall(init_bio);
1695