1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/iocontext.h>
23#include <linux/slab.h>
24#include <linux/init.h>
25#include <linux/kernel.h>
26#include <linux/export.h>
27#include <linux/mempool.h>
28#include <linux/workqueue.h>
29#include <linux/cgroup.h>
30#include <scsi/sg.h>
31
32#include <trace/events/block.h>
33
34
35
36
37
38#define BIO_INLINE_VECS 4
39
40static mempool_t *bio_split_pool __read_mostly;
41
42
43
44
45
46
47#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
48static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
49 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
50};
51#undef BV
52
53
54
55
56
57struct bio_set *fs_bio_set;
58
59
60
61
62struct bio_slab {
63 struct kmem_cache *slab;
64 unsigned int slab_ref;
65 unsigned int slab_size;
66 char name[8];
67};
68static DEFINE_MUTEX(bio_slab_lock);
69static struct bio_slab *bio_slabs;
70static unsigned int bio_slab_nr, bio_slab_max;
71
72static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{
74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab, *new_bio_slabs;
77 unsigned int i, entry = -1;
78
79 mutex_lock(&bio_slab_lock);
80
81 i = 0;
82 while (i < bio_slab_nr) {
83 bslab = &bio_slabs[i];
84
85 if (!bslab->slab && entry == -1)
86 entry = i;
87 else if (bslab->slab_size == sz) {
88 slab = bslab->slab;
89 bslab->slab_ref++;
90 break;
91 }
92 i++;
93 }
94
95 if (slab)
96 goto out_unlock;
97
98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1;
100 new_bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL);
103 if (!new_bio_slabs)
104 goto out_unlock;
105 bio_slabs = new_bio_slabs;
106 }
107 if (entry == -1)
108 entry = bio_slab_nr++;
109
110 bslab = &bio_slabs[entry];
111
112 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
113 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
114 if (!slab)
115 goto out_unlock;
116
117 printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
118 bslab->slab = slab;
119 bslab->slab_ref = 1;
120 bslab->slab_size = sz;
121out_unlock:
122 mutex_unlock(&bio_slab_lock);
123 return slab;
124}
125
126static void bio_put_slab(struct bio_set *bs)
127{
128 struct bio_slab *bslab = NULL;
129 unsigned int i;
130
131 mutex_lock(&bio_slab_lock);
132
133 for (i = 0; i < bio_slab_nr; i++) {
134 if (bs->bio_slab == bio_slabs[i].slab) {
135 bslab = &bio_slabs[i];
136 break;
137 }
138 }
139
140 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
141 goto out;
142
143 WARN_ON(!bslab->slab_ref);
144
145 if (--bslab->slab_ref)
146 goto out;
147
148 kmem_cache_destroy(bslab->slab);
149 bslab->slab = NULL;
150
151out:
152 mutex_unlock(&bio_slab_lock);
153}
154
155unsigned int bvec_nr_vecs(unsigned short idx)
156{
157 return bvec_slabs[idx].nr_vecs;
158}
159
160void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
161{
162 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
163
164 if (idx == BIOVEC_MAX_IDX)
165 mempool_free(bv, bs->bvec_pool);
166 else {
167 struct biovec_slab *bvs = bvec_slabs + idx;
168
169 kmem_cache_free(bvs->slab, bv);
170 }
171}
172
173struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
174 struct bio_set *bs)
175{
176 struct bio_vec *bvl;
177
178
179
180
181 switch (nr) {
182 case 1:
183 *idx = 0;
184 break;
185 case 2 ... 4:
186 *idx = 1;
187 break;
188 case 5 ... 16:
189 *idx = 2;
190 break;
191 case 17 ... 64:
192 *idx = 3;
193 break;
194 case 65 ... 128:
195 *idx = 4;
196 break;
197 case 129 ... BIO_MAX_PAGES:
198 *idx = 5;
199 break;
200 default:
201 return NULL;
202 }
203
204
205
206
207
208 if (*idx == BIOVEC_MAX_IDX) {
209fallback:
210 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
211 } else {
212 struct biovec_slab *bvs = bvec_slabs + *idx;
213 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
214
215
216
217
218
219
220 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
221
222
223
224
225
226 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
227 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
228 *idx = BIOVEC_MAX_IDX;
229 goto fallback;
230 }
231 }
232
233 return bvl;
234}
235
236void bio_free(struct bio *bio, struct bio_set *bs)
237{
238 void *p;
239
240 if (bio_has_allocated_vec(bio))
241 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
242
243 if (bio_integrity(bio))
244 bio_integrity_free(bio, bs);
245
246
247
248
249 p = bio;
250 if (bs->front_pad)
251 p -= bs->front_pad;
252
253 mempool_free(p, bs->bio_pool);
254}
255EXPORT_SYMBOL(bio_free);
256
257void bio_init(struct bio *bio)
258{
259 memset(bio, 0, sizeof(*bio));
260 bio->bi_flags = 1 << BIO_UPTODATE;
261 atomic_set(&bio->bi_cnt, 1);
262}
263EXPORT_SYMBOL(bio_init);
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
281{
282 unsigned long idx = BIO_POOL_NONE;
283 struct bio_vec *bvl = NULL;
284 struct bio *bio;
285 void *p;
286
287 p = mempool_alloc(bs->bio_pool, gfp_mask);
288 if (unlikely(!p))
289 return NULL;
290 bio = p + bs->front_pad;
291
292 bio_init(bio);
293
294 if (unlikely(!nr_iovecs))
295 goto out_set;
296
297 if (nr_iovecs <= BIO_INLINE_VECS) {
298 bvl = bio->bi_inline_vecs;
299 nr_iovecs = BIO_INLINE_VECS;
300 } else {
301 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
302 if (unlikely(!bvl))
303 goto err_free;
304
305 nr_iovecs = bvec_nr_vecs(idx);
306 }
307out_set:
308 bio->bi_flags |= idx << BIO_POOL_OFFSET;
309 bio->bi_max_vecs = nr_iovecs;
310 bio->bi_io_vec = bvl;
311 return bio;
312
313err_free:
314 mempool_free(p, bs->bio_pool);
315 return NULL;
316}
317EXPORT_SYMBOL(bio_alloc_bioset);
318
319static void bio_fs_destructor(struct bio *bio)
320{
321 bio_free(bio, fs_bio_set);
322}
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
344{
345 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
346
347 if (bio)
348 bio->bi_destructor = bio_fs_destructor;
349
350 return bio;
351}
352EXPORT_SYMBOL(bio_alloc);
353
354static void bio_kmalloc_destructor(struct bio *bio)
355{
356 if (bio_integrity(bio))
357 bio_integrity_free(bio, fs_bio_set);
358 kfree(bio);
359}
360
361
362
363
364
365
366
367
368
369
370
371struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
372{
373 struct bio *bio;
374
375 if (nr_iovecs > UIO_MAXIOV)
376 return NULL;
377
378 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
379 gfp_mask);
380 if (unlikely(!bio))
381 return NULL;
382
383 bio_init(bio);
384 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
385 bio->bi_max_vecs = nr_iovecs;
386 bio->bi_io_vec = bio->bi_inline_vecs;
387 bio->bi_destructor = bio_kmalloc_destructor;
388
389 return bio;
390}
391EXPORT_SYMBOL(bio_kmalloc);
392
393void zero_fill_bio(struct bio *bio)
394{
395 unsigned long flags;
396 struct bio_vec *bv;
397 int i;
398
399 bio_for_each_segment(bv, bio, i) {
400 char *data = bvec_kmap_irq(bv, &flags);
401 memset(data, 0, bv->bv_len);
402 flush_dcache_page(bv->bv_page);
403 bvec_kunmap_irq(data, &flags);
404 }
405}
406EXPORT_SYMBOL(zero_fill_bio);
407
408
409
410
411
412
413
414
415
416void bio_put(struct bio *bio)
417{
418 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
419
420
421
422
423 if (atomic_dec_and_test(&bio->bi_cnt)) {
424 bio_disassociate_task(bio);
425 bio->bi_next = NULL;
426 bio->bi_destructor(bio);
427 }
428}
429EXPORT_SYMBOL(bio_put);
430
431inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
432{
433 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
434 blk_recount_segments(q, bio);
435
436 return bio->bi_phys_segments;
437}
438EXPORT_SYMBOL(bio_phys_segments);
439
440
441
442
443
444
445
446
447
448
449void __bio_clone(struct bio *bio, struct bio *bio_src)
450{
451 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
452 bio_src->bi_max_vecs * sizeof(struct bio_vec));
453
454
455
456
457
458 bio->bi_sector = bio_src->bi_sector;
459 bio->bi_bdev = bio_src->bi_bdev;
460 bio->bi_flags |= 1 << BIO_CLONED;
461 bio->bi_rw = bio_src->bi_rw;
462 bio->bi_vcnt = bio_src->bi_vcnt;
463 bio->bi_size = bio_src->bi_size;
464 bio->bi_idx = bio_src->bi_idx;
465}
466EXPORT_SYMBOL(__bio_clone);
467
468
469
470
471
472
473
474
475struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
476{
477 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
478
479 if (!b)
480 return NULL;
481
482 b->bi_destructor = bio_fs_destructor;
483 __bio_clone(b, bio);
484
485 if (bio_integrity(bio)) {
486 int ret;
487
488 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
489
490 if (ret < 0) {
491 bio_put(b);
492 return NULL;
493 }
494 }
495
496 return b;
497}
498EXPORT_SYMBOL(bio_clone);
499
500
501
502
503
504
505
506
507
508
509int bio_get_nr_vecs(struct block_device *bdev)
510{
511 struct request_queue *q = bdev_get_queue(bdev);
512 int nr_pages;
513
514 nr_pages = min_t(unsigned,
515 queue_max_segments(q),
516 queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
517
518 return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
519
520}
521EXPORT_SYMBOL(bio_get_nr_vecs);
522
523static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
524 *page, unsigned int len, unsigned int offset,
525 unsigned short max_sectors)
526{
527 int retried_segments = 0;
528 struct bio_vec *bvec;
529
530
531
532
533 if (unlikely(bio_flagged(bio, BIO_CLONED)))
534 return 0;
535
536 if (((bio->bi_size + len) >> 9) > max_sectors)
537 return 0;
538
539
540
541
542
543
544 if (bio->bi_vcnt > 0) {
545 struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
546
547 if (page == prev->bv_page &&
548 offset == prev->bv_offset + prev->bv_len) {
549 unsigned int prev_bv_len = prev->bv_len;
550 prev->bv_len += len;
551
552 if (q->merge_bvec_fn) {
553 struct bvec_merge_data bvm = {
554
555
556
557
558 .bi_bdev = bio->bi_bdev,
559 .bi_sector = bio->bi_sector,
560 .bi_size = bio->bi_size - prev_bv_len,
561 .bi_rw = bio->bi_rw,
562 };
563
564 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
565 prev->bv_len -= len;
566 return 0;
567 }
568 }
569
570 goto done;
571 }
572 }
573
574 if (bio->bi_vcnt >= bio->bi_max_vecs)
575 return 0;
576
577
578
579
580
581
582 while (bio->bi_phys_segments >= queue_max_segments(q)) {
583
584 if (retried_segments)
585 return 0;
586
587 retried_segments = 1;
588 blk_recount_segments(q, bio);
589 }
590
591
592
593
594
595 bvec = &bio->bi_io_vec[bio->bi_vcnt];
596 bvec->bv_page = page;
597 bvec->bv_len = len;
598 bvec->bv_offset = offset;
599
600
601
602
603
604
605 if (q->merge_bvec_fn) {
606 struct bvec_merge_data bvm = {
607 .bi_bdev = bio->bi_bdev,
608 .bi_sector = bio->bi_sector,
609 .bi_size = bio->bi_size,
610 .bi_rw = bio->bi_rw,
611 };
612
613
614
615
616
617 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
618 bvec->bv_page = NULL;
619 bvec->bv_len = 0;
620 bvec->bv_offset = 0;
621 return 0;
622 }
623 }
624
625
626 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
627 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
628
629 bio->bi_vcnt++;
630 bio->bi_phys_segments++;
631 done:
632 bio->bi_size += len;
633 return len;
634}
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
652 unsigned int len, unsigned int offset)
653{
654 return __bio_add_page(q, bio, page, len, offset,
655 queue_max_hw_sectors(q));
656}
657EXPORT_SYMBOL(bio_add_pc_page);
658
659
660
661
662
663
664
665
666
667
668
669
670
671int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
672 unsigned int offset)
673{
674 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
675 return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
676}
677EXPORT_SYMBOL(bio_add_page);
678
679struct bio_map_data {
680 struct bio_vec *iovecs;
681 struct sg_iovec *sgvecs;
682 int nr_sgvecs;
683 int is_our_pages;
684};
685
686static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
687 struct sg_iovec *iov, int iov_count,
688 int is_our_pages)
689{
690 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
691 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
692 bmd->nr_sgvecs = iov_count;
693 bmd->is_our_pages = is_our_pages;
694 bio->bi_private = bmd;
695}
696
697static void bio_free_map_data(struct bio_map_data *bmd)
698{
699 kfree(bmd->iovecs);
700 kfree(bmd->sgvecs);
701 kfree(bmd);
702}
703
704static struct bio_map_data *bio_alloc_map_data(int nr_segs,
705 unsigned int iov_count,
706 gfp_t gfp_mask)
707{
708 struct bio_map_data *bmd;
709
710 if (iov_count > UIO_MAXIOV)
711 return NULL;
712
713 bmd = kmalloc(sizeof(*bmd), gfp_mask);
714 if (!bmd)
715 return NULL;
716
717 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
718 if (!bmd->iovecs) {
719 kfree(bmd);
720 return NULL;
721 }
722
723 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
724 if (bmd->sgvecs)
725 return bmd;
726
727 kfree(bmd->iovecs);
728 kfree(bmd);
729 return NULL;
730}
731
732static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
733 struct sg_iovec *iov, int iov_count,
734 int to_user, int from_user, int do_free_page)
735{
736 int ret = 0, i;
737 struct bio_vec *bvec;
738 int iov_idx = 0;
739 unsigned int iov_off = 0;
740
741 __bio_for_each_segment(bvec, bio, i, 0) {
742 char *bv_addr = page_address(bvec->bv_page);
743 unsigned int bv_len = iovecs[i].bv_len;
744
745 while (bv_len && iov_idx < iov_count) {
746 unsigned int bytes;
747 char __user *iov_addr;
748
749 bytes = min_t(unsigned int,
750 iov[iov_idx].iov_len - iov_off, bv_len);
751 iov_addr = iov[iov_idx].iov_base + iov_off;
752
753 if (!ret) {
754 if (to_user)
755 ret = copy_to_user(iov_addr, bv_addr,
756 bytes);
757
758 if (from_user)
759 ret = copy_from_user(bv_addr, iov_addr,
760 bytes);
761
762 if (ret)
763 ret = -EFAULT;
764 }
765
766 bv_len -= bytes;
767 bv_addr += bytes;
768 iov_addr += bytes;
769 iov_off += bytes;
770
771 if (iov[iov_idx].iov_len == iov_off) {
772 iov_idx++;
773 iov_off = 0;
774 }
775 }
776
777 if (do_free_page)
778 __free_page(bvec->bv_page);
779 }
780
781 return ret;
782}
783
784
785
786
787
788
789
790
791int bio_uncopy_user(struct bio *bio)
792{
793 struct bio_map_data *bmd = bio->bi_private;
794 int ret = 0;
795
796 if (!bio_flagged(bio, BIO_NULL_MAPPED))
797 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
798 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
799 0, bmd->is_our_pages);
800 bio_free_map_data(bmd);
801 bio_put(bio);
802 return ret;
803}
804EXPORT_SYMBOL(bio_uncopy_user);
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819struct bio *bio_copy_user_iov(struct request_queue *q,
820 struct rq_map_data *map_data,
821 struct sg_iovec *iov, int iov_count,
822 int write_to_vm, gfp_t gfp_mask)
823{
824 struct bio_map_data *bmd;
825 struct bio_vec *bvec;
826 struct page *page;
827 struct bio *bio;
828 int i, ret;
829 int nr_pages = 0;
830 unsigned int len = 0;
831 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
832
833 for (i = 0; i < iov_count; i++) {
834 unsigned long uaddr;
835 unsigned long end;
836 unsigned long start;
837
838 uaddr = (unsigned long)iov[i].iov_base;
839 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
840 start = uaddr >> PAGE_SHIFT;
841
842
843
844
845 if (end < start)
846 return ERR_PTR(-EINVAL);
847
848 nr_pages += end - start;
849 len += iov[i].iov_len;
850 }
851
852 if (offset)
853 nr_pages++;
854
855 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
856 if (!bmd)
857 return ERR_PTR(-ENOMEM);
858
859 ret = -ENOMEM;
860 bio = bio_kmalloc(gfp_mask, nr_pages);
861 if (!bio)
862 goto out_bmd;
863
864 if (!write_to_vm)
865 bio->bi_rw |= REQ_WRITE;
866
867 ret = 0;
868
869 if (map_data) {
870 nr_pages = 1 << map_data->page_order;
871 i = map_data->offset / PAGE_SIZE;
872 }
873 while (len) {
874 unsigned int bytes = PAGE_SIZE;
875
876 bytes -= offset;
877
878 if (bytes > len)
879 bytes = len;
880
881 if (map_data) {
882 if (i == map_data->nr_entries * nr_pages) {
883 ret = -ENOMEM;
884 break;
885 }
886
887 page = map_data->pages[i / nr_pages];
888 page += (i % nr_pages);
889
890 i++;
891 } else {
892 page = alloc_page(q->bounce_gfp | gfp_mask);
893 if (!page) {
894 ret = -ENOMEM;
895 break;
896 }
897 }
898
899 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
900 break;
901
902 len -= bytes;
903 offset = 0;
904 }
905
906 if (ret)
907 goto cleanup;
908
909
910
911
912 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
913 (map_data && map_data->from_user)) {
914 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
915 if (ret)
916 goto cleanup;
917 }
918
919 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
920 return bio;
921cleanup:
922 if (!map_data)
923 bio_for_each_segment(bvec, bio, i)
924 __free_page(bvec->bv_page);
925
926 bio_put(bio);
927out_bmd:
928 bio_free_map_data(bmd);
929 return ERR_PTR(ret);
930}
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
946 unsigned long uaddr, unsigned int len,
947 int write_to_vm, gfp_t gfp_mask)
948{
949 struct sg_iovec iov;
950
951 iov.iov_base = (void __user *)uaddr;
952 iov.iov_len = len;
953
954 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
955}
956EXPORT_SYMBOL(bio_copy_user);
957
958static struct bio *__bio_map_user_iov(struct request_queue *q,
959 struct block_device *bdev,
960 struct sg_iovec *iov, int iov_count,
961 int write_to_vm, gfp_t gfp_mask)
962{
963 int i, j;
964 int nr_pages = 0;
965 struct page **pages;
966 struct bio *bio;
967 int cur_page = 0;
968 int ret, offset;
969
970 for (i = 0; i < iov_count; i++) {
971 unsigned long uaddr = (unsigned long)iov[i].iov_base;
972 unsigned long len = iov[i].iov_len;
973 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
974 unsigned long start = uaddr >> PAGE_SHIFT;
975
976
977
978
979 if (end < start)
980 return ERR_PTR(-EINVAL);
981
982 nr_pages += end - start;
983
984
985
986 if (uaddr & queue_dma_alignment(q))
987 return ERR_PTR(-EINVAL);
988 }
989
990 if (!nr_pages)
991 return ERR_PTR(-EINVAL);
992
993 bio = bio_kmalloc(gfp_mask, nr_pages);
994 if (!bio)
995 return ERR_PTR(-ENOMEM);
996
997 ret = -ENOMEM;
998 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
999 if (!pages)
1000 goto out;
1001
1002 for (i = 0; i < iov_count; i++) {
1003 unsigned long uaddr = (unsigned long)iov[i].iov_base;
1004 unsigned long len = iov[i].iov_len;
1005 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1006 unsigned long start = uaddr >> PAGE_SHIFT;
1007 const int local_nr_pages = end - start;
1008 const int page_limit = cur_page + local_nr_pages;
1009
1010 ret = get_user_pages_fast(uaddr, local_nr_pages,
1011 write_to_vm, &pages[cur_page]);
1012 if (ret < local_nr_pages) {
1013 ret = -EFAULT;
1014 goto out_unmap;
1015 }
1016
1017 offset = uaddr & ~PAGE_MASK;
1018 for (j = cur_page; j < page_limit; j++) {
1019 unsigned int bytes = PAGE_SIZE - offset;
1020
1021 if (len <= 0)
1022 break;
1023
1024 if (bytes > len)
1025 bytes = len;
1026
1027
1028
1029
1030 if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
1031 bytes)
1032 break;
1033
1034 len -= bytes;
1035 offset = 0;
1036 }
1037
1038 cur_page = j;
1039
1040
1041
1042 while (j < page_limit)
1043 page_cache_release(pages[j++]);
1044 }
1045
1046 kfree(pages);
1047
1048
1049
1050
1051 if (!write_to_vm)
1052 bio->bi_rw |= REQ_WRITE;
1053
1054 bio->bi_bdev = bdev;
1055 bio->bi_flags |= (1 << BIO_USER_MAPPED);
1056 return bio;
1057
1058 out_unmap:
1059 for (i = 0; i < nr_pages; i++) {
1060 if(!pages[i])
1061 break;
1062 page_cache_release(pages[i]);
1063 }
1064 out:
1065 kfree(pages);
1066 bio_put(bio);
1067 return ERR_PTR(ret);
1068}
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
1083 unsigned long uaddr, unsigned int len, int write_to_vm,
1084 gfp_t gfp_mask)
1085{
1086 struct sg_iovec iov;
1087
1088 iov.iov_base = (void __user *)uaddr;
1089 iov.iov_len = len;
1090
1091 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
1092}
1093EXPORT_SYMBOL(bio_map_user);
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1108 struct sg_iovec *iov, int iov_count,
1109 int write_to_vm, gfp_t gfp_mask)
1110{
1111 struct bio *bio;
1112
1113 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
1114 gfp_mask);
1115 if (IS_ERR(bio))
1116 return bio;
1117
1118
1119
1120
1121
1122
1123
1124 bio_get(bio);
1125
1126 return bio;
1127}
1128
1129static void __bio_unmap_user(struct bio *bio)
1130{
1131 struct bio_vec *bvec;
1132 int i;
1133
1134
1135
1136
1137 __bio_for_each_segment(bvec, bio, i, 0) {
1138 if (bio_data_dir(bio) == READ)
1139 set_page_dirty_lock(bvec->bv_page);
1140
1141 page_cache_release(bvec->bv_page);
1142 }
1143
1144 bio_put(bio);
1145}
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156void bio_unmap_user(struct bio *bio)
1157{
1158 __bio_unmap_user(bio);
1159 bio_put(bio);
1160}
1161EXPORT_SYMBOL(bio_unmap_user);
1162
1163static void bio_map_kern_endio(struct bio *bio, int err)
1164{
1165 bio_put(bio);
1166}
1167
1168static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1169 unsigned int len, gfp_t gfp_mask)
1170{
1171 unsigned long kaddr = (unsigned long)data;
1172 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1173 unsigned long start = kaddr >> PAGE_SHIFT;
1174 const int nr_pages = end - start;
1175 int offset, i;
1176 struct bio *bio;
1177
1178 bio = bio_kmalloc(gfp_mask, nr_pages);
1179 if (!bio)
1180 return ERR_PTR(-ENOMEM);
1181
1182 offset = offset_in_page(kaddr);
1183 for (i = 0; i < nr_pages; i++) {
1184 unsigned int bytes = PAGE_SIZE - offset;
1185
1186 if (len <= 0)
1187 break;
1188
1189 if (bytes > len)
1190 bytes = len;
1191
1192 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1193 offset) < bytes)
1194 break;
1195
1196 data += bytes;
1197 len -= bytes;
1198 offset = 0;
1199 }
1200
1201 bio->bi_end_io = bio_map_kern_endio;
1202 return bio;
1203}
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1216 gfp_t gfp_mask)
1217{
1218 struct bio *bio;
1219
1220 bio = __bio_map_kern(q, data, len, gfp_mask);
1221 if (IS_ERR(bio))
1222 return bio;
1223
1224 if (bio->bi_size == len)
1225 return bio;
1226
1227
1228
1229
1230 bio_put(bio);
1231 return ERR_PTR(-EINVAL);
1232}
1233EXPORT_SYMBOL(bio_map_kern);
1234
1235static void bio_copy_kern_endio(struct bio *bio, int err)
1236{
1237 struct bio_vec *bvec;
1238 const int read = bio_data_dir(bio) == READ;
1239 struct bio_map_data *bmd = bio->bi_private;
1240 int i;
1241 char *p = bmd->sgvecs[0].iov_base;
1242
1243 __bio_for_each_segment(bvec, bio, i, 0) {
1244 char *addr = page_address(bvec->bv_page);
1245 int len = bmd->iovecs[i].bv_len;
1246
1247 if (read)
1248 memcpy(p, addr, len);
1249
1250 __free_page(bvec->bv_page);
1251 p += len;
1252 }
1253
1254 bio_free_map_data(bmd);
1255 bio_put(bio);
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1270 gfp_t gfp_mask, int reading)
1271{
1272 struct bio *bio;
1273 struct bio_vec *bvec;
1274 int i;
1275
1276 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1277 if (IS_ERR(bio))
1278 return bio;
1279
1280 if (!reading) {
1281 void *p = data;
1282
1283 bio_for_each_segment(bvec, bio, i) {
1284 char *addr = page_address(bvec->bv_page);
1285
1286 memcpy(addr, p, bvec->bv_len);
1287 p += bvec->bv_len;
1288 }
1289 }
1290
1291 bio->bi_end_io = bio_copy_kern_endio;
1292
1293 return bio;
1294}
1295EXPORT_SYMBOL(bio_copy_kern);
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326void bio_set_pages_dirty(struct bio *bio)
1327{
1328 struct bio_vec *bvec = bio->bi_io_vec;
1329 int i;
1330
1331 for (i = 0; i < bio->bi_vcnt; i++) {
1332 struct page *page = bvec[i].bv_page;
1333
1334 if (page && !PageCompound(page))
1335 set_page_dirty_lock(page);
1336 }
1337}
1338
1339static void bio_release_pages(struct bio *bio)
1340{
1341 struct bio_vec *bvec = bio->bi_io_vec;
1342 int i;
1343
1344 for (i = 0; i < bio->bi_vcnt; i++) {
1345 struct page *page = bvec[i].bv_page;
1346
1347 if (page)
1348 put_page(page);
1349 }
1350}
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363static void bio_dirty_fn(struct work_struct *work);
1364
1365static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1366static DEFINE_SPINLOCK(bio_dirty_lock);
1367static struct bio *bio_dirty_list;
1368
1369
1370
1371
1372static void bio_dirty_fn(struct work_struct *work)
1373{
1374 unsigned long flags;
1375 struct bio *bio;
1376
1377 spin_lock_irqsave(&bio_dirty_lock, flags);
1378 bio = bio_dirty_list;
1379 bio_dirty_list = NULL;
1380 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1381
1382 while (bio) {
1383 struct bio *next = bio->bi_private;
1384
1385 bio_set_pages_dirty(bio);
1386 bio_release_pages(bio);
1387 bio_put(bio);
1388 bio = next;
1389 }
1390}
1391
1392void bio_check_pages_dirty(struct bio *bio)
1393{
1394 struct bio_vec *bvec = bio->bi_io_vec;
1395 int nr_clean_pages = 0;
1396 int i;
1397
1398 for (i = 0; i < bio->bi_vcnt; i++) {
1399 struct page *page = bvec[i].bv_page;
1400
1401 if (PageDirty(page) || PageCompound(page)) {
1402 page_cache_release(page);
1403 bvec[i].bv_page = NULL;
1404 } else {
1405 nr_clean_pages++;
1406 }
1407 }
1408
1409 if (nr_clean_pages) {
1410 unsigned long flags;
1411
1412 spin_lock_irqsave(&bio_dirty_lock, flags);
1413 bio->bi_private = bio_dirty_list;
1414 bio_dirty_list = bio;
1415 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1416 schedule_work(&bio_dirty_work);
1417 } else {
1418 bio_put(bio);
1419 }
1420}
1421
1422#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1423void bio_flush_dcache_pages(struct bio *bi)
1424{
1425 int i;
1426 struct bio_vec *bvec;
1427
1428 bio_for_each_segment(bvec, bi, i)
1429 flush_dcache_page(bvec->bv_page);
1430}
1431EXPORT_SYMBOL(bio_flush_dcache_pages);
1432#endif
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448void bio_endio(struct bio *bio, int error)
1449{
1450 if (error)
1451 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1452 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1453 error = -EIO;
1454
1455 if (bio->bi_end_io)
1456 bio->bi_end_io(bio, error);
1457}
1458EXPORT_SYMBOL(bio_endio);
1459
1460void bio_pair_release(struct bio_pair *bp)
1461{
1462 if (atomic_dec_and_test(&bp->cnt)) {
1463 struct bio *master = bp->bio1.bi_private;
1464
1465 bio_endio(master, bp->error);
1466 mempool_free(bp, bp->bio2.bi_private);
1467 }
1468}
1469EXPORT_SYMBOL(bio_pair_release);
1470
1471static void bio_pair_end_1(struct bio *bi, int err)
1472{
1473 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1474
1475 if (err)
1476 bp->error = err;
1477
1478 bio_pair_release(bp);
1479}
1480
1481static void bio_pair_end_2(struct bio *bi, int err)
1482{
1483 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1484
1485 if (err)
1486 bp->error = err;
1487
1488 bio_pair_release(bp);
1489}
1490
1491
1492
1493
1494struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1495{
1496 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1497
1498 if (!bp)
1499 return bp;
1500
1501 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1502 bi->bi_sector + first_sectors);
1503
1504 BUG_ON(bi->bi_vcnt != 1);
1505 BUG_ON(bi->bi_idx != 0);
1506 atomic_set(&bp->cnt, 3);
1507 bp->error = 0;
1508 bp->bio1 = *bi;
1509 bp->bio2 = *bi;
1510 bp->bio2.bi_sector += first_sectors;
1511 bp->bio2.bi_size -= first_sectors << 9;
1512 bp->bio1.bi_size = first_sectors << 9;
1513
1514 bp->bv1 = bi->bi_io_vec[0];
1515 bp->bv2 = bi->bi_io_vec[0];
1516 bp->bv2.bv_offset += first_sectors << 9;
1517 bp->bv2.bv_len -= first_sectors << 9;
1518 bp->bv1.bv_len = first_sectors << 9;
1519
1520 bp->bio1.bi_io_vec = &bp->bv1;
1521 bp->bio2.bi_io_vec = &bp->bv2;
1522
1523 bp->bio1.bi_max_vecs = 1;
1524 bp->bio2.bi_max_vecs = 1;
1525
1526 bp->bio1.bi_end_io = bio_pair_end_1;
1527 bp->bio2.bi_end_io = bio_pair_end_2;
1528
1529 bp->bio1.bi_private = bi;
1530 bp->bio2.bi_private = bio_split_pool;
1531
1532 if (bio_integrity(bi))
1533 bio_integrity_split(bi, bp, first_sectors);
1534
1535 return bp;
1536}
1537EXPORT_SYMBOL(bio_split);
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1550 unsigned int offset)
1551{
1552 unsigned int sector_sz;
1553 struct bio_vec *bv;
1554 sector_t sectors;
1555 int i;
1556
1557 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
1558 sectors = 0;
1559
1560 if (index >= bio->bi_idx)
1561 index = bio->bi_vcnt - 1;
1562
1563 __bio_for_each_segment(bv, bio, i, 0) {
1564 if (i == index) {
1565 if (offset > bv->bv_offset)
1566 sectors += (offset - bv->bv_offset) / sector_sz;
1567 break;
1568 }
1569
1570 sectors += bv->bv_len / sector_sz;
1571 }
1572
1573 return sectors;
1574}
1575EXPORT_SYMBOL(bio_sector_offset);
1576
1577
1578
1579
1580
1581static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1582{
1583 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1584
1585 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1586 if (!bs->bvec_pool)
1587 return -ENOMEM;
1588
1589 return 0;
1590}
1591
1592static void biovec_free_pools(struct bio_set *bs)
1593{
1594 mempool_destroy(bs->bvec_pool);
1595}
1596
1597void bioset_free(struct bio_set *bs)
1598{
1599 if (bs->bio_pool)
1600 mempool_destroy(bs->bio_pool);
1601
1602 bioset_integrity_free(bs);
1603 biovec_free_pools(bs);
1604 bio_put_slab(bs);
1605
1606 kfree(bs);
1607}
1608EXPORT_SYMBOL(bioset_free);
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1624{
1625 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1626 struct bio_set *bs;
1627
1628 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1629 if (!bs)
1630 return NULL;
1631
1632 bs->front_pad = front_pad;
1633
1634 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1635 if (!bs->bio_slab) {
1636 kfree(bs);
1637 return NULL;
1638 }
1639
1640 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1641 if (!bs->bio_pool)
1642 goto bad;
1643
1644 if (!biovec_create_pools(bs, pool_size))
1645 return bs;
1646
1647bad:
1648 bioset_free(bs);
1649 return NULL;
1650}
1651EXPORT_SYMBOL(bioset_create);
1652
1653#ifdef CONFIG_BLK_CGROUP
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667int bio_associate_current(struct bio *bio)
1668{
1669 struct io_context *ioc;
1670 struct cgroup_subsys_state *css;
1671
1672 if (bio->bi_ioc)
1673 return -EBUSY;
1674
1675 ioc = current->io_context;
1676 if (!ioc)
1677 return -ENOENT;
1678
1679
1680 get_io_context_active(ioc);
1681 bio->bi_ioc = ioc;
1682
1683
1684 rcu_read_lock();
1685 css = task_subsys_state(current, blkio_subsys_id);
1686 if (css && css_tryget(css))
1687 bio->bi_css = css;
1688 rcu_read_unlock();
1689
1690 return 0;
1691}
1692
1693
1694
1695
1696
1697void bio_disassociate_task(struct bio *bio)
1698{
1699 if (bio->bi_ioc) {
1700 put_io_context(bio->bi_ioc);
1701 bio->bi_ioc = NULL;
1702 }
1703 if (bio->bi_css) {
1704 css_put(bio->bi_css);
1705 bio->bi_css = NULL;
1706 }
1707}
1708
1709#endif
1710
1711static void __init biovec_init_slabs(void)
1712{
1713 int i;
1714
1715 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1716 int size;
1717 struct biovec_slab *bvs = bvec_slabs + i;
1718
1719 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1720 bvs->slab = NULL;
1721 continue;
1722 }
1723
1724 size = bvs->nr_vecs * sizeof(struct bio_vec);
1725 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1726 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1727 }
1728}
1729
1730static int __init init_bio(void)
1731{
1732 bio_slab_max = 2;
1733 bio_slab_nr = 0;
1734 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1735 if (!bio_slabs)
1736 panic("bio: can't allocate bios\n");
1737
1738 bio_integrity_init();
1739 biovec_init_slabs();
1740
1741 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1742 if (!fs_bio_set)
1743 panic("bio: can't allocate bios\n");
1744
1745 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
1746 panic("bio: can't create integrity pool\n");
1747
1748 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1749 sizeof(struct bio_pair));
1750 if (!bio_split_pool)
1751 panic("bio: can't create split pool\n");
1752
1753 return 0;
1754}
1755subsys_initcall(init_bio);
1756