1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/mempool.h>
27#include <linux/workqueue.h>
28#include <scsi/sg.h>
29
30#include <trace/events/block.h>
31
32
33
34
35
36#define BIO_INLINE_VECS 4
37
38static mempool_t *bio_split_pool __read_mostly;
39
40
41
42
43
44
45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
46struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
48};
49#undef BV
50
51
52
53
54
55struct bio_set *fs_bio_set;
56
57
58
59
60struct bio_slab {
61 struct kmem_cache *slab;
62 unsigned int slab_ref;
63 unsigned int slab_size;
64 char name[8];
65};
66static DEFINE_MUTEX(bio_slab_lock);
67static struct bio_slab *bio_slabs;
68static unsigned int bio_slab_nr, bio_slab_max;
69
70static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
71{
72 unsigned int sz = sizeof(struct bio) + extra_size;
73 struct kmem_cache *slab = NULL;
74 struct bio_slab *bslab;
75 unsigned int i, entry = -1;
76
77 mutex_lock(&bio_slab_lock);
78
79 i = 0;
80 while (i < bio_slab_nr) {
81 struct bio_slab *bslab = &bio_slabs[i];
82
83 if (!bslab->slab && entry == -1)
84 entry = i;
85 else if (bslab->slab_size == sz) {
86 slab = bslab->slab;
87 bslab->slab_ref++;
88 break;
89 }
90 i++;
91 }
92
93 if (slab)
94 goto out_unlock;
95
96 if (bio_slab_nr == bio_slab_max && entry == -1) {
97 bio_slab_max <<= 1;
98 bio_slabs = krealloc(bio_slabs,
99 bio_slab_max * sizeof(struct bio_slab),
100 GFP_KERNEL);
101 if (!bio_slabs)
102 goto out_unlock;
103 }
104 if (entry == -1)
105 entry = bio_slab_nr++;
106
107 bslab = &bio_slabs[entry];
108
109 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
110 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
111 if (!slab)
112 goto out_unlock;
113
114 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
115 bslab->slab = slab;
116 bslab->slab_ref = 1;
117 bslab->slab_size = sz;
118out_unlock:
119 mutex_unlock(&bio_slab_lock);
120 return slab;
121}
122
123static void bio_put_slab(struct bio_set *bs)
124{
125 struct bio_slab *bslab = NULL;
126 unsigned int i;
127
128 mutex_lock(&bio_slab_lock);
129
130 for (i = 0; i < bio_slab_nr; i++) {
131 if (bs->bio_slab == bio_slabs[i].slab) {
132 bslab = &bio_slabs[i];
133 break;
134 }
135 }
136
137 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
138 goto out;
139
140 WARN_ON(!bslab->slab_ref);
141
142 if (--bslab->slab_ref)
143 goto out;
144
145 kmem_cache_destroy(bslab->slab);
146 bslab->slab = NULL;
147
148out:
149 mutex_unlock(&bio_slab_lock);
150}
151
152unsigned int bvec_nr_vecs(unsigned short idx)
153{
154 return bvec_slabs[idx].nr_vecs;
155}
156
157void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
158{
159 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
160
161 if (idx == BIOVEC_MAX_IDX)
162 mempool_free(bv, bs->bvec_pool);
163 else {
164 struct biovec_slab *bvs = bvec_slabs + idx;
165
166 kmem_cache_free(bvs->slab, bv);
167 }
168}
169
170struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
171 struct bio_set *bs)
172{
173 struct bio_vec *bvl;
174
175
176
177
178 switch (nr) {
179 case 1:
180 *idx = 0;
181 break;
182 case 2 ... 4:
183 *idx = 1;
184 break;
185 case 5 ... 16:
186 *idx = 2;
187 break;
188 case 17 ... 64:
189 *idx = 3;
190 break;
191 case 65 ... 128:
192 *idx = 4;
193 break;
194 case 129 ... BIO_MAX_PAGES:
195 *idx = 5;
196 break;
197 default:
198 return NULL;
199 }
200
201
202
203
204
205 if (*idx == BIOVEC_MAX_IDX) {
206fallback:
207 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
208 } else {
209 struct biovec_slab *bvs = bvec_slabs + *idx;
210 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
211
212
213
214
215
216
217 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
218
219
220
221
222
223 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
224 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
225 *idx = BIOVEC_MAX_IDX;
226 goto fallback;
227 }
228 }
229
230 return bvl;
231}
232
233void bio_free(struct bio *bio, struct bio_set *bs)
234{
235 void *p;
236
237 if (bio_has_allocated_vec(bio))
238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
239
240 if (bio_integrity(bio))
241 bio_integrity_free(bio, bs);
242
243
244
245
246 p = bio;
247 if (bs->front_pad)
248 p -= bs->front_pad;
249
250 mempool_free(p, bs->bio_pool);
251}
252EXPORT_SYMBOL(bio_free);
253
254void bio_init(struct bio *bio)
255{
256 memset(bio, 0, sizeof(*bio));
257 bio->bi_flags = 1 << BIO_UPTODATE;
258 bio->bi_comp_cpu = -1;
259 atomic_set(&bio->bi_cnt, 1);
260}
261EXPORT_SYMBOL(bio_init);
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
280{
281 unsigned long idx = BIO_POOL_NONE;
282 struct bio_vec *bvl = NULL;
283 struct bio *bio;
284 void *p;
285
286 p = mempool_alloc(bs->bio_pool, gfp_mask);
287 if (unlikely(!p))
288 return NULL;
289 bio = p + bs->front_pad;
290
291 bio_init(bio);
292
293 if (unlikely(!nr_iovecs))
294 goto out_set;
295
296 if (nr_iovecs <= BIO_INLINE_VECS) {
297 bvl = bio->bi_inline_vecs;
298 nr_iovecs = BIO_INLINE_VECS;
299 } else {
300 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
301 if (unlikely(!bvl))
302 goto err_free;
303
304 nr_iovecs = bvec_nr_vecs(idx);
305 }
306out_set:
307 bio->bi_flags |= idx << BIO_POOL_OFFSET;
308 bio->bi_max_vecs = nr_iovecs;
309 bio->bi_io_vec = bvl;
310 return bio;
311
312err_free:
313 mempool_free(p, bs->bio_pool);
314 return NULL;
315}
316EXPORT_SYMBOL(bio_alloc_bioset);
317
318static void bio_fs_destructor(struct bio *bio)
319{
320 bio_free(bio, fs_bio_set);
321}
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
343{
344 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
345
346 if (bio)
347 bio->bi_destructor = bio_fs_destructor;
348
349 return bio;
350}
351EXPORT_SYMBOL(bio_alloc);
352
353static void bio_kmalloc_destructor(struct bio *bio)
354{
355 if (bio_integrity(bio))
356 bio_integrity_free(bio, fs_bio_set);
357 kfree(bio);
358}
359
360
361
362
363
364
365
366
367
368
369
370struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
371{
372 struct bio *bio;
373
374 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
375 gfp_mask);
376 if (unlikely(!bio))
377 return NULL;
378
379 bio_init(bio);
380 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
381 bio->bi_max_vecs = nr_iovecs;
382 bio->bi_io_vec = bio->bi_inline_vecs;
383 bio->bi_destructor = bio_kmalloc_destructor;
384
385 return bio;
386}
387EXPORT_SYMBOL(bio_kmalloc);
388
389void zero_fill_bio(struct bio *bio)
390{
391 unsigned long flags;
392 struct bio_vec *bv;
393 int i;
394
395 bio_for_each_segment(bv, bio, i) {
396 char *data = bvec_kmap_irq(bv, &flags);
397 memset(data, 0, bv->bv_len);
398 flush_dcache_page(bv->bv_page);
399 bvec_kunmap_irq(data, &flags);
400 }
401}
402EXPORT_SYMBOL(zero_fill_bio);
403
404
405
406
407
408
409
410
411
412void bio_put(struct bio *bio)
413{
414 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
415
416
417
418
419 if (atomic_dec_and_test(&bio->bi_cnt)) {
420 bio->bi_next = NULL;
421 bio->bi_destructor(bio);
422 }
423}
424EXPORT_SYMBOL(bio_put);
425
426inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
427{
428 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
429 blk_recount_segments(q, bio);
430
431 return bio->bi_phys_segments;
432}
433EXPORT_SYMBOL(bio_phys_segments);
434
435
436
437
438
439
440
441
442
443
444void __bio_clone(struct bio *bio, struct bio *bio_src)
445{
446 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
447 bio_src->bi_max_vecs * sizeof(struct bio_vec));
448
449
450
451
452
453 bio->bi_sector = bio_src->bi_sector;
454 bio->bi_bdev = bio_src->bi_bdev;
455 bio->bi_flags |= 1 << BIO_CLONED;
456 bio->bi_rw = bio_src->bi_rw;
457 bio->bi_vcnt = bio_src->bi_vcnt;
458 bio->bi_size = bio_src->bi_size;
459 bio->bi_idx = bio_src->bi_idx;
460}
461EXPORT_SYMBOL(__bio_clone);
462
463
464
465
466
467
468
469
470struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
471{
472 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
473
474 if (!b)
475 return NULL;
476
477 b->bi_destructor = bio_fs_destructor;
478 __bio_clone(b, bio);
479
480 if (bio_integrity(bio)) {
481 int ret;
482
483 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
484
485 if (ret < 0) {
486 bio_put(b);
487 return NULL;
488 }
489 }
490
491 return b;
492}
493EXPORT_SYMBOL(bio_clone);
494
495
496
497
498
499
500
501
502
503
504int bio_get_nr_vecs(struct block_device *bdev)
505{
506 struct request_queue *q = bdev_get_queue(bdev);
507 int nr_pages;
508
509 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
510 if (nr_pages > queue_max_phys_segments(q))
511 nr_pages = queue_max_phys_segments(q);
512 if (nr_pages > queue_max_hw_segments(q))
513 nr_pages = queue_max_hw_segments(q);
514
515 return nr_pages;
516}
517EXPORT_SYMBOL(bio_get_nr_vecs);
518
519static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
520 *page, unsigned int len, unsigned int offset,
521 unsigned short max_sectors)
522{
523 int retried_segments = 0;
524 struct bio_vec *bvec;
525
526
527
528
529 if (unlikely(bio_flagged(bio, BIO_CLONED)))
530 return 0;
531
532 if (((bio->bi_size + len) >> 9) > max_sectors)
533 return 0;
534
535
536
537
538
539
540 if (bio->bi_vcnt > 0) {
541 struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
542
543 if (page == prev->bv_page &&
544 offset == prev->bv_offset + prev->bv_len) {
545 prev->bv_len += len;
546
547 if (q->merge_bvec_fn) {
548 struct bvec_merge_data bvm = {
549 .bi_bdev = bio->bi_bdev,
550 .bi_sector = bio->bi_sector,
551 .bi_size = bio->bi_size,
552 .bi_rw = bio->bi_rw,
553 };
554
555 if (q->merge_bvec_fn(q, &bvm, prev) < len) {
556 prev->bv_len -= len;
557 return 0;
558 }
559 }
560
561 goto done;
562 }
563 }
564
565 if (bio->bi_vcnt >= bio->bi_max_vecs)
566 return 0;
567
568
569
570
571
572
573 while (bio->bi_phys_segments >= queue_max_phys_segments(q)
574 || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
575
576 if (retried_segments)
577 return 0;
578
579 retried_segments = 1;
580 blk_recount_segments(q, bio);
581 }
582
583
584
585
586
587 bvec = &bio->bi_io_vec[bio->bi_vcnt];
588 bvec->bv_page = page;
589 bvec->bv_len = len;
590 bvec->bv_offset = offset;
591
592
593
594
595
596
597 if (q->merge_bvec_fn) {
598 struct bvec_merge_data bvm = {
599 .bi_bdev = bio->bi_bdev,
600 .bi_sector = bio->bi_sector,
601 .bi_size = bio->bi_size,
602 .bi_rw = bio->bi_rw,
603 };
604
605
606
607
608
609 if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
610 bvec->bv_page = NULL;
611 bvec->bv_len = 0;
612 bvec->bv_offset = 0;
613 return 0;
614 }
615 }
616
617
618 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
619 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
620
621 bio->bi_vcnt++;
622 bio->bi_phys_segments++;
623 done:
624 bio->bi_size += len;
625 return len;
626}
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
643 unsigned int len, unsigned int offset)
644{
645 return __bio_add_page(q, bio, page, len, offset,
646 queue_max_hw_sectors(q));
647}
648EXPORT_SYMBOL(bio_add_pc_page);
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
664 unsigned int offset)
665{
666 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
667 return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
668}
669EXPORT_SYMBOL(bio_add_page);
670
671struct bio_map_data {
672 struct bio_vec *iovecs;
673 struct sg_iovec *sgvecs;
674 int nr_sgvecs;
675 int is_our_pages;
676};
677
678static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
679 struct sg_iovec *iov, int iov_count,
680 int is_our_pages)
681{
682 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
683 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
684 bmd->nr_sgvecs = iov_count;
685 bmd->is_our_pages = is_our_pages;
686 bio->bi_private = bmd;
687}
688
689static void bio_free_map_data(struct bio_map_data *bmd)
690{
691 kfree(bmd->iovecs);
692 kfree(bmd->sgvecs);
693 kfree(bmd);
694}
695
696static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
697 gfp_t gfp_mask)
698{
699 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
700
701 if (!bmd)
702 return NULL;
703
704 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
705 if (!bmd->iovecs) {
706 kfree(bmd);
707 return NULL;
708 }
709
710 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
711 if (bmd->sgvecs)
712 return bmd;
713
714 kfree(bmd->iovecs);
715 kfree(bmd);
716 return NULL;
717}
718
719static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
720 struct sg_iovec *iov, int iov_count,
721 int to_user, int from_user, int do_free_page)
722{
723 int ret = 0, i;
724 struct bio_vec *bvec;
725 int iov_idx = 0;
726 unsigned int iov_off = 0;
727
728 __bio_for_each_segment(bvec, bio, i, 0) {
729 char *bv_addr = page_address(bvec->bv_page);
730 unsigned int bv_len = iovecs[i].bv_len;
731
732 while (bv_len && iov_idx < iov_count) {
733 unsigned int bytes;
734 char __user *iov_addr;
735
736 bytes = min_t(unsigned int,
737 iov[iov_idx].iov_len - iov_off, bv_len);
738 iov_addr = iov[iov_idx].iov_base + iov_off;
739
740 if (!ret) {
741 if (to_user)
742 ret = copy_to_user(iov_addr, bv_addr,
743 bytes);
744
745 if (from_user)
746 ret = copy_from_user(bv_addr, iov_addr,
747 bytes);
748
749 if (ret)
750 ret = -EFAULT;
751 }
752
753 bv_len -= bytes;
754 bv_addr += bytes;
755 iov_addr += bytes;
756 iov_off += bytes;
757
758 if (iov[iov_idx].iov_len == iov_off) {
759 iov_idx++;
760 iov_off = 0;
761 }
762 }
763
764 if (do_free_page)
765 __free_page(bvec->bv_page);
766 }
767
768 return ret;
769}
770
771
772
773
774
775
776
777
778int bio_uncopy_user(struct bio *bio)
779{
780 struct bio_map_data *bmd = bio->bi_private;
781 int ret = 0;
782
783 if (!bio_flagged(bio, BIO_NULL_MAPPED))
784 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
785 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
786 0, bmd->is_our_pages);
787 bio_free_map_data(bmd);
788 bio_put(bio);
789 return ret;
790}
791EXPORT_SYMBOL(bio_uncopy_user);
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806struct bio *bio_copy_user_iov(struct request_queue *q,
807 struct rq_map_data *map_data,
808 struct sg_iovec *iov, int iov_count,
809 int write_to_vm, gfp_t gfp_mask)
810{
811 struct bio_map_data *bmd;
812 struct bio_vec *bvec;
813 struct page *page;
814 struct bio *bio;
815 int i, ret;
816 int nr_pages = 0;
817 unsigned int len = 0;
818 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
819
820 for (i = 0; i < iov_count; i++) {
821 unsigned long uaddr;
822 unsigned long end;
823 unsigned long start;
824
825 uaddr = (unsigned long)iov[i].iov_base;
826 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
827 start = uaddr >> PAGE_SHIFT;
828
829 nr_pages += end - start;
830 len += iov[i].iov_len;
831 }
832
833 if (offset)
834 nr_pages++;
835
836 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
837 if (!bmd)
838 return ERR_PTR(-ENOMEM);
839
840 ret = -ENOMEM;
841 bio = bio_kmalloc(gfp_mask, nr_pages);
842 if (!bio)
843 goto out_bmd;
844
845 bio->bi_rw |= (!write_to_vm << BIO_RW);
846
847 ret = 0;
848
849 if (map_data) {
850 nr_pages = 1 << map_data->page_order;
851 i = map_data->offset / PAGE_SIZE;
852 }
853 while (len) {
854 unsigned int bytes = PAGE_SIZE;
855
856 bytes -= offset;
857
858 if (bytes > len)
859 bytes = len;
860
861 if (map_data) {
862 if (i == map_data->nr_entries * nr_pages) {
863 ret = -ENOMEM;
864 break;
865 }
866
867 page = map_data->pages[i / nr_pages];
868 page += (i % nr_pages);
869
870 i++;
871 } else {
872 page = alloc_page(q->bounce_gfp | gfp_mask);
873 if (!page) {
874 ret = -ENOMEM;
875 break;
876 }
877 }
878
879 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
880 break;
881
882 len -= bytes;
883 offset = 0;
884 }
885
886 if (ret)
887 goto cleanup;
888
889
890
891
892 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
893 (map_data && map_data->from_user)) {
894 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
895 if (ret)
896 goto cleanup;
897 }
898
899 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
900 return bio;
901cleanup:
902 if (!map_data)
903 bio_for_each_segment(bvec, bio, i)
904 __free_page(bvec->bv_page);
905
906 bio_put(bio);
907out_bmd:
908 bio_free_map_data(bmd);
909 return ERR_PTR(ret);
910}
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
926 unsigned long uaddr, unsigned int len,
927 int write_to_vm, gfp_t gfp_mask)
928{
929 struct sg_iovec iov;
930
931 iov.iov_base = (void __user *)uaddr;
932 iov.iov_len = len;
933
934 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
935}
936EXPORT_SYMBOL(bio_copy_user);
937
938static struct bio *__bio_map_user_iov(struct request_queue *q,
939 struct block_device *bdev,
940 struct sg_iovec *iov, int iov_count,
941 int write_to_vm, gfp_t gfp_mask)
942{
943 int i, j;
944 int nr_pages = 0;
945 struct page **pages;
946 struct bio *bio;
947 int cur_page = 0;
948 int ret, offset;
949
950 for (i = 0; i < iov_count; i++) {
951 unsigned long uaddr = (unsigned long)iov[i].iov_base;
952 unsigned long len = iov[i].iov_len;
953 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
954 unsigned long start = uaddr >> PAGE_SHIFT;
955
956 nr_pages += end - start;
957
958
959
960 if (uaddr & queue_dma_alignment(q))
961 return ERR_PTR(-EINVAL);
962 }
963
964 if (!nr_pages)
965 return ERR_PTR(-EINVAL);
966
967 bio = bio_kmalloc(gfp_mask, nr_pages);
968 if (!bio)
969 return ERR_PTR(-ENOMEM);
970
971 ret = -ENOMEM;
972 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
973 if (!pages)
974 goto out;
975
976 for (i = 0; i < iov_count; i++) {
977 unsigned long uaddr = (unsigned long)iov[i].iov_base;
978 unsigned long len = iov[i].iov_len;
979 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
980 unsigned long start = uaddr >> PAGE_SHIFT;
981 const int local_nr_pages = end - start;
982 const int page_limit = cur_page + local_nr_pages;
983
984 ret = get_user_pages_fast(uaddr, local_nr_pages,
985 write_to_vm, &pages[cur_page]);
986 if (ret < local_nr_pages) {
987 ret = -EFAULT;
988 goto out_unmap;
989 }
990
991 offset = uaddr & ~PAGE_MASK;
992 for (j = cur_page; j < page_limit; j++) {
993 unsigned int bytes = PAGE_SIZE - offset;
994
995 if (len <= 0)
996 break;
997
998 if (bytes > len)
999 bytes = len;
1000
1001
1002
1003
1004 if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
1005 bytes)
1006 break;
1007
1008 len -= bytes;
1009 offset = 0;
1010 }
1011
1012 cur_page = j;
1013
1014
1015
1016 while (j < page_limit)
1017 page_cache_release(pages[j++]);
1018 }
1019
1020 kfree(pages);
1021
1022
1023
1024
1025 if (!write_to_vm)
1026 bio->bi_rw |= (1 << BIO_RW);
1027
1028 bio->bi_bdev = bdev;
1029 bio->bi_flags |= (1 << BIO_USER_MAPPED);
1030 return bio;
1031
1032 out_unmap:
1033 for (i = 0; i < nr_pages; i++) {
1034 if(!pages[i])
1035 break;
1036 page_cache_release(pages[i]);
1037 }
1038 out:
1039 kfree(pages);
1040 bio_put(bio);
1041 return ERR_PTR(ret);
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
1057 unsigned long uaddr, unsigned int len, int write_to_vm,
1058 gfp_t gfp_mask)
1059{
1060 struct sg_iovec iov;
1061
1062 iov.iov_base = (void __user *)uaddr;
1063 iov.iov_len = len;
1064
1065 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
1066}
1067EXPORT_SYMBOL(bio_map_user);
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1082 struct sg_iovec *iov, int iov_count,
1083 int write_to_vm, gfp_t gfp_mask)
1084{
1085 struct bio *bio;
1086
1087 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
1088 gfp_mask);
1089 if (IS_ERR(bio))
1090 return bio;
1091
1092
1093
1094
1095
1096
1097
1098 bio_get(bio);
1099
1100 return bio;
1101}
1102
1103static void __bio_unmap_user(struct bio *bio)
1104{
1105 struct bio_vec *bvec;
1106 int i;
1107
1108
1109
1110
1111 __bio_for_each_segment(bvec, bio, i, 0) {
1112 if (bio_data_dir(bio) == READ)
1113 set_page_dirty_lock(bvec->bv_page);
1114
1115 page_cache_release(bvec->bv_page);
1116 }
1117
1118 bio_put(bio);
1119}
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130void bio_unmap_user(struct bio *bio)
1131{
1132 __bio_unmap_user(bio);
1133 bio_put(bio);
1134}
1135EXPORT_SYMBOL(bio_unmap_user);
1136
1137static void bio_map_kern_endio(struct bio *bio, int err)
1138{
1139 bio_put(bio);
1140}
1141
1142static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1143 unsigned int len, gfp_t gfp_mask)
1144{
1145 unsigned long kaddr = (unsigned long)data;
1146 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1147 unsigned long start = kaddr >> PAGE_SHIFT;
1148 const int nr_pages = end - start;
1149 int offset, i;
1150 struct bio *bio;
1151
1152 bio = bio_kmalloc(gfp_mask, nr_pages);
1153 if (!bio)
1154 return ERR_PTR(-ENOMEM);
1155
1156 offset = offset_in_page(kaddr);
1157 for (i = 0; i < nr_pages; i++) {
1158 unsigned int bytes = PAGE_SIZE - offset;
1159
1160 if (len <= 0)
1161 break;
1162
1163 if (bytes > len)
1164 bytes = len;
1165
1166 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
1167 offset) < bytes)
1168 break;
1169
1170 data += bytes;
1171 len -= bytes;
1172 offset = 0;
1173 }
1174
1175 bio->bi_end_io = bio_map_kern_endio;
1176 return bio;
1177}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
1190 gfp_t gfp_mask)
1191{
1192 struct bio *bio;
1193
1194 bio = __bio_map_kern(q, data, len, gfp_mask);
1195 if (IS_ERR(bio))
1196 return bio;
1197
1198 if (bio->bi_size == len)
1199 return bio;
1200
1201
1202
1203
1204 bio_put(bio);
1205 return ERR_PTR(-EINVAL);
1206}
1207EXPORT_SYMBOL(bio_map_kern);
1208
1209static void bio_copy_kern_endio(struct bio *bio, int err)
1210{
1211 struct bio_vec *bvec;
1212 const int read = bio_data_dir(bio) == READ;
1213 struct bio_map_data *bmd = bio->bi_private;
1214 int i;
1215 char *p = bmd->sgvecs[0].iov_base;
1216
1217 __bio_for_each_segment(bvec, bio, i, 0) {
1218 char *addr = page_address(bvec->bv_page);
1219 int len = bmd->iovecs[i].bv_len;
1220
1221 if (read)
1222 memcpy(p, addr, len);
1223
1224 __free_page(bvec->bv_page);
1225 p += len;
1226 }
1227
1228 bio_free_map_data(bmd);
1229 bio_put(bio);
1230}
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1244 gfp_t gfp_mask, int reading)
1245{
1246 struct bio *bio;
1247 struct bio_vec *bvec;
1248 int i;
1249
1250 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1251 if (IS_ERR(bio))
1252 return bio;
1253
1254 if (!reading) {
1255 void *p = data;
1256
1257 bio_for_each_segment(bvec, bio, i) {
1258 char *addr = page_address(bvec->bv_page);
1259
1260 memcpy(addr, p, bvec->bv_len);
1261 p += bvec->bv_len;
1262 }
1263 }
1264
1265 bio->bi_end_io = bio_copy_kern_endio;
1266
1267 return bio;
1268}
1269EXPORT_SYMBOL(bio_copy_kern);
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300void bio_set_pages_dirty(struct bio *bio)
1301{
1302 struct bio_vec *bvec = bio->bi_io_vec;
1303 int i;
1304
1305 for (i = 0; i < bio->bi_vcnt; i++) {
1306 struct page *page = bvec[i].bv_page;
1307
1308 if (page && !PageCompound(page))
1309 set_page_dirty_lock(page);
1310 }
1311}
1312
1313static void bio_release_pages(struct bio *bio)
1314{
1315 struct bio_vec *bvec = bio->bi_io_vec;
1316 int i;
1317
1318 for (i = 0; i < bio->bi_vcnt; i++) {
1319 struct page *page = bvec[i].bv_page;
1320
1321 if (page)
1322 put_page(page);
1323 }
1324}
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337static void bio_dirty_fn(struct work_struct *work);
1338
1339static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1340static DEFINE_SPINLOCK(bio_dirty_lock);
1341static struct bio *bio_dirty_list;
1342
1343
1344
1345
1346static void bio_dirty_fn(struct work_struct *work)
1347{
1348 unsigned long flags;
1349 struct bio *bio;
1350
1351 spin_lock_irqsave(&bio_dirty_lock, flags);
1352 bio = bio_dirty_list;
1353 bio_dirty_list = NULL;
1354 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1355
1356 while (bio) {
1357 struct bio *next = bio->bi_private;
1358
1359 bio_set_pages_dirty(bio);
1360 bio_release_pages(bio);
1361 bio_put(bio);
1362 bio = next;
1363 }
1364}
1365
1366void bio_check_pages_dirty(struct bio *bio)
1367{
1368 struct bio_vec *bvec = bio->bi_io_vec;
1369 int nr_clean_pages = 0;
1370 int i;
1371
1372 for (i = 0; i < bio->bi_vcnt; i++) {
1373 struct page *page = bvec[i].bv_page;
1374
1375 if (PageDirty(page) || PageCompound(page)) {
1376 page_cache_release(page);
1377 bvec[i].bv_page = NULL;
1378 } else {
1379 nr_clean_pages++;
1380 }
1381 }
1382
1383 if (nr_clean_pages) {
1384 unsigned long flags;
1385
1386 spin_lock_irqsave(&bio_dirty_lock, flags);
1387 bio->bi_private = bio_dirty_list;
1388 bio_dirty_list = bio;
1389 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1390 schedule_work(&bio_dirty_work);
1391 } else {
1392 bio_put(bio);
1393 }
1394}
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410void bio_endio(struct bio *bio, int error)
1411{
1412 if (error)
1413 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1414 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1415 error = -EIO;
1416
1417 if (bio->bi_end_io)
1418 bio->bi_end_io(bio, error);
1419}
1420EXPORT_SYMBOL(bio_endio);
1421
1422void bio_pair_release(struct bio_pair *bp)
1423{
1424 if (atomic_dec_and_test(&bp->cnt)) {
1425 struct bio *master = bp->bio1.bi_private;
1426
1427 bio_endio(master, bp->error);
1428 mempool_free(bp, bp->bio2.bi_private);
1429 }
1430}
1431EXPORT_SYMBOL(bio_pair_release);
1432
1433static void bio_pair_end_1(struct bio *bi, int err)
1434{
1435 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1436
1437 if (err)
1438 bp->error = err;
1439
1440 bio_pair_release(bp);
1441}
1442
1443static void bio_pair_end_2(struct bio *bi, int err)
1444{
1445 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1446
1447 if (err)
1448 bp->error = err;
1449
1450 bio_pair_release(bp);
1451}
1452
1453
1454
1455
1456struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1457{
1458 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1459
1460 if (!bp)
1461 return bp;
1462
1463 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1464 bi->bi_sector + first_sectors);
1465
1466 BUG_ON(bi->bi_vcnt != 1);
1467 BUG_ON(bi->bi_idx != 0);
1468 atomic_set(&bp->cnt, 3);
1469 bp->error = 0;
1470 bp->bio1 = *bi;
1471 bp->bio2 = *bi;
1472 bp->bio2.bi_sector += first_sectors;
1473 bp->bio2.bi_size -= first_sectors << 9;
1474 bp->bio1.bi_size = first_sectors << 9;
1475
1476 bp->bv1 = bi->bi_io_vec[0];
1477 bp->bv2 = bi->bi_io_vec[0];
1478 bp->bv2.bv_offset += first_sectors << 9;
1479 bp->bv2.bv_len -= first_sectors << 9;
1480 bp->bv1.bv_len = first_sectors << 9;
1481
1482 bp->bio1.bi_io_vec = &bp->bv1;
1483 bp->bio2.bi_io_vec = &bp->bv2;
1484
1485 bp->bio1.bi_max_vecs = 1;
1486 bp->bio2.bi_max_vecs = 1;
1487
1488 bp->bio1.bi_end_io = bio_pair_end_1;
1489 bp->bio2.bi_end_io = bio_pair_end_2;
1490
1491 bp->bio1.bi_private = bi;
1492 bp->bio2.bi_private = bio_split_pool;
1493
1494 if (bio_integrity(bi))
1495 bio_integrity_split(bi, bp, first_sectors);
1496
1497 return bp;
1498}
1499EXPORT_SYMBOL(bio_split);
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1512 unsigned int offset)
1513{
1514 unsigned int sector_sz;
1515 struct bio_vec *bv;
1516 sector_t sectors;
1517 int i;
1518
1519 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
1520 sectors = 0;
1521
1522 if (index >= bio->bi_idx)
1523 index = bio->bi_vcnt - 1;
1524
1525 __bio_for_each_segment(bv, bio, i, 0) {
1526 if (i == index) {
1527 if (offset > bv->bv_offset)
1528 sectors += (offset - bv->bv_offset) / sector_sz;
1529 break;
1530 }
1531
1532 sectors += bv->bv_len / sector_sz;
1533 }
1534
1535 return sectors;
1536}
1537EXPORT_SYMBOL(bio_sector_offset);
1538
1539
1540
1541
1542
1543static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1544{
1545 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1546
1547 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1548 if (!bs->bvec_pool)
1549 return -ENOMEM;
1550
1551 return 0;
1552}
1553
1554static void biovec_free_pools(struct bio_set *bs)
1555{
1556 mempool_destroy(bs->bvec_pool);
1557}
1558
1559void bioset_free(struct bio_set *bs)
1560{
1561 if (bs->bio_pool)
1562 mempool_destroy(bs->bio_pool);
1563
1564 bioset_integrity_free(bs);
1565 biovec_free_pools(bs);
1566 bio_put_slab(bs);
1567
1568 kfree(bs);
1569}
1570EXPORT_SYMBOL(bioset_free);
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1586{
1587 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1588 struct bio_set *bs;
1589
1590 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1591 if (!bs)
1592 return NULL;
1593
1594 bs->front_pad = front_pad;
1595
1596 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1597 if (!bs->bio_slab) {
1598 kfree(bs);
1599 return NULL;
1600 }
1601
1602 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1603 if (!bs->bio_pool)
1604 goto bad;
1605
1606 if (bioset_integrity_create(bs, pool_size))
1607 goto bad;
1608
1609 if (!biovec_create_pools(bs, pool_size))
1610 return bs;
1611
1612bad:
1613 bioset_free(bs);
1614 return NULL;
1615}
1616EXPORT_SYMBOL(bioset_create);
1617
1618static void __init biovec_init_slabs(void)
1619{
1620 int i;
1621
1622 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1623 int size;
1624 struct biovec_slab *bvs = bvec_slabs + i;
1625
1626#ifndef CONFIG_BLK_DEV_INTEGRITY
1627 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1628 bvs->slab = NULL;
1629 continue;
1630 }
1631#endif
1632
1633 size = bvs->nr_vecs * sizeof(struct bio_vec);
1634 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1635 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1636 }
1637}
1638
1639static int __init init_bio(void)
1640{
1641 bio_slab_max = 2;
1642 bio_slab_nr = 0;
1643 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1644 if (!bio_slabs)
1645 panic("bio: can't allocate bios\n");
1646
1647 bio_integrity_init();
1648 biovec_init_slabs();
1649
1650 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1651 if (!fs_bio_set)
1652 panic("bio: can't allocate bios\n");
1653
1654 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1655 sizeof(struct bio_pair));
1656 if (!bio_split_pool)
1657 panic("bio: can't create split pool\n");
1658
1659 return 0;
1660}
1661subsys_initcall(init_bio);
1662