1
2
3
4
5#include <linux/mm.h>
6#include <linux/swap.h>
7#include <linux/bio.h>
8#include <linux/blkdev.h>
9#include <linux/uio.h>
10#include <linux/iocontext.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/export.h>
15#include <linux/mempool.h>
16#include <linux/workqueue.h>
17#include <linux/cgroup.h>
18#include <linux/blk-cgroup.h>
19#include <linux/highmem.h>
20#include <linux/sched/sysctl.h>
21#include <linux/blk-crypto.h>
22#include <linux/xarray.h>
23
24#include <trace/events/block.h>
25#include "blk.h"
26#include "blk-rq-qos.h"
27
28static struct biovec_slab {
29 int nr_vecs;
30 char *name;
31 struct kmem_cache *slab;
32} bvec_slabs[] __read_mostly = {
33 { .nr_vecs = 16, .name = "biovec-16" },
34 { .nr_vecs = 64, .name = "biovec-64" },
35 { .nr_vecs = 128, .name = "biovec-128" },
36 { .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
37};
38
39static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
40{
41 switch (nr_vecs) {
42
43 case 5 ... 16:
44 return &bvec_slabs[0];
45 case 17 ... 64:
46 return &bvec_slabs[1];
47 case 65 ... 128:
48 return &bvec_slabs[2];
49 case 129 ... BIO_MAX_VECS:
50 return &bvec_slabs[3];
51 default:
52 BUG();
53 return NULL;
54 }
55}
56
57
58
59
60
61struct bio_set fs_bio_set;
62EXPORT_SYMBOL(fs_bio_set);
63
64
65
66
67struct bio_slab {
68 struct kmem_cache *slab;
69 unsigned int slab_ref;
70 unsigned int slab_size;
71 char name[8];
72};
73static DEFINE_MUTEX(bio_slab_lock);
74static DEFINE_XARRAY(bio_slabs);
75
76static struct bio_slab *create_bio_slab(unsigned int size)
77{
78 struct bio_slab *bslab = kzalloc(sizeof(*bslab), GFP_KERNEL);
79
80 if (!bslab)
81 return NULL;
82
83 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size);
84 bslab->slab = kmem_cache_create(bslab->name, size,
85 ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
86 if (!bslab->slab)
87 goto fail_alloc_slab;
88
89 bslab->slab_ref = 1;
90 bslab->slab_size = size;
91
92 if (!xa_err(xa_store(&bio_slabs, size, bslab, GFP_KERNEL)))
93 return bslab;
94
95 kmem_cache_destroy(bslab->slab);
96
97fail_alloc_slab:
98 kfree(bslab);
99 return NULL;
100}
101
102static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
103{
104 return bs->front_pad + sizeof(struct bio) + bs->back_pad;
105}
106
107static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
108{
109 unsigned int size = bs_bio_slab_size(bs);
110 struct bio_slab *bslab;
111
112 mutex_lock(&bio_slab_lock);
113 bslab = xa_load(&bio_slabs, size);
114 if (bslab)
115 bslab->slab_ref++;
116 else
117 bslab = create_bio_slab(size);
118 mutex_unlock(&bio_slab_lock);
119
120 if (bslab)
121 return bslab->slab;
122 return NULL;
123}
124
125static void bio_put_slab(struct bio_set *bs)
126{
127 struct bio_slab *bslab = NULL;
128 unsigned int slab_size = bs_bio_slab_size(bs);
129
130 mutex_lock(&bio_slab_lock);
131
132 bslab = xa_load(&bio_slabs, slab_size);
133 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
134 goto out;
135
136 WARN_ON_ONCE(bslab->slab != bs->bio_slab);
137
138 WARN_ON(!bslab->slab_ref);
139
140 if (--bslab->slab_ref)
141 goto out;
142
143 xa_erase(&bio_slabs, slab_size);
144
145 kmem_cache_destroy(bslab->slab);
146 kfree(bslab);
147
148out:
149 mutex_unlock(&bio_slab_lock);
150}
151
152void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
153{
154 BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
155
156 if (nr_vecs == BIO_MAX_VECS)
157 mempool_free(bv, pool);
158 else if (nr_vecs > BIO_INLINE_VECS)
159 kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
160}
161
162
163
164
165
166static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
167{
168 return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
169 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
170}
171
172struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
173 gfp_t gfp_mask)
174{
175 struct biovec_slab *bvs = biovec_slab(*nr_vecs);
176
177 if (WARN_ON_ONCE(!bvs))
178 return NULL;
179
180
181
182
183
184 *nr_vecs = bvs->nr_vecs;
185
186
187
188
189
190
191 if (*nr_vecs < BIO_MAX_VECS) {
192 struct bio_vec *bvl;
193
194 bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
195 if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
196 return bvl;
197 *nr_vecs = BIO_MAX_VECS;
198 }
199
200 return mempool_alloc(pool, gfp_mask);
201}
202
203void bio_uninit(struct bio *bio)
204{
205#ifdef CONFIG_BLK_CGROUP
206 if (bio->bi_blkg) {
207 blkg_put(bio->bi_blkg);
208 bio->bi_blkg = NULL;
209 }
210#endif
211 if (bio_integrity(bio))
212 bio_integrity_free(bio);
213
214 bio_crypt_free_ctx(bio);
215}
216EXPORT_SYMBOL(bio_uninit);
217
218static void bio_free(struct bio *bio)
219{
220 struct bio_set *bs = bio->bi_pool;
221 void *p;
222
223 bio_uninit(bio);
224
225 if (bs) {
226 bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
227
228
229
230
231 p = bio;
232 p -= bs->front_pad;
233
234 mempool_free(p, &bs->bio_pool);
235 } else {
236
237 kfree(bio);
238 }
239}
240
241
242
243
244
245
246void bio_init(struct bio *bio, struct bio_vec *table,
247 unsigned short max_vecs)
248{
249 memset(bio, 0, sizeof(*bio));
250 atomic_set(&bio->__bi_remaining, 1);
251 atomic_set(&bio->__bi_cnt, 1);
252
253 bio->bi_io_vec = table;
254 bio->bi_max_vecs = max_vecs;
255}
256EXPORT_SYMBOL(bio_init);
257
258
259
260
261
262
263
264
265
266
267
268void bio_reset(struct bio *bio)
269{
270 bio_uninit(bio);
271 memset(bio, 0, BIO_RESET_BYTES);
272 atomic_set(&bio->__bi_remaining, 1);
273}
274EXPORT_SYMBOL(bio_reset);
275
276static struct bio *__bio_chain_endio(struct bio *bio)
277{
278 struct bio *parent = bio->bi_private;
279
280 if (bio->bi_status && !parent->bi_status)
281 parent->bi_status = bio->bi_status;
282 bio_put(bio);
283 return parent;
284}
285
286static void bio_chain_endio(struct bio *bio)
287{
288 bio_endio(__bio_chain_endio(bio));
289}
290
291
292
293
294
295
296
297
298
299
300
301
302void bio_chain(struct bio *bio, struct bio *parent)
303{
304 BUG_ON(bio->bi_private || bio->bi_end_io);
305
306 bio->bi_private = parent;
307 bio->bi_end_io = bio_chain_endio;
308 bio_inc_remaining(parent);
309}
310EXPORT_SYMBOL(bio_chain);
311
312static void bio_alloc_rescue(struct work_struct *work)
313{
314 struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
315 struct bio *bio;
316
317 while (1) {
318 spin_lock(&bs->rescue_lock);
319 bio = bio_list_pop(&bs->rescue_list);
320 spin_unlock(&bs->rescue_lock);
321
322 if (!bio)
323 break;
324
325 submit_bio_noacct(bio);
326 }
327}
328
329static void punt_bios_to_rescuer(struct bio_set *bs)
330{
331 struct bio_list punt, nopunt;
332 struct bio *bio;
333
334 if (WARN_ON_ONCE(!bs->rescue_workqueue))
335 return;
336
337
338
339
340
341
342
343
344
345
346
347 bio_list_init(&punt);
348 bio_list_init(&nopunt);
349
350 while ((bio = bio_list_pop(¤t->bio_list[0])))
351 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
352 current->bio_list[0] = nopunt;
353
354 bio_list_init(&nopunt);
355 while ((bio = bio_list_pop(¤t->bio_list[1])))
356 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
357 current->bio_list[1] = nopunt;
358
359 spin_lock(&bs->rescue_lock);
360 bio_list_merge(&bs->rescue_list, &punt);
361 spin_unlock(&bs->rescue_lock);
362
363 queue_work(bs->rescue_workqueue, &bs->rescue_work);
364}
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
399 struct bio_set *bs)
400{
401 gfp_t saved_gfp = gfp_mask;
402 struct bio *bio;
403 void *p;
404
405
406 if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
407 return NULL;
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427 if (current->bio_list &&
428 (!bio_list_empty(¤t->bio_list[0]) ||
429 !bio_list_empty(¤t->bio_list[1])) &&
430 bs->rescue_workqueue)
431 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
432
433 p = mempool_alloc(&bs->bio_pool, gfp_mask);
434 if (!p && gfp_mask != saved_gfp) {
435 punt_bios_to_rescuer(bs);
436 gfp_mask = saved_gfp;
437 p = mempool_alloc(&bs->bio_pool, gfp_mask);
438 }
439 if (unlikely(!p))
440 return NULL;
441
442 bio = p + bs->front_pad;
443 if (nr_iovecs > BIO_INLINE_VECS) {
444 struct bio_vec *bvl = NULL;
445
446 bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
447 if (!bvl && gfp_mask != saved_gfp) {
448 punt_bios_to_rescuer(bs);
449 gfp_mask = saved_gfp;
450 bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
451 }
452 if (unlikely(!bvl))
453 goto err_free;
454
455 bio_init(bio, bvl, nr_iovecs);
456 } else if (nr_iovecs) {
457 bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
458 } else {
459 bio_init(bio, NULL, 0);
460 }
461
462 bio->bi_pool = bs;
463 return bio;
464
465err_free:
466 mempool_free(p, &bs->bio_pool);
467 return NULL;
468}
469EXPORT_SYMBOL(bio_alloc_bioset);
470
471
472
473
474
475
476
477
478
479
480struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
481{
482 struct bio *bio;
483
484 if (nr_iovecs > UIO_MAXIOV)
485 return NULL;
486
487 bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
488 if (unlikely(!bio))
489 return NULL;
490 bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
491 bio->bi_pool = NULL;
492 return bio;
493}
494EXPORT_SYMBOL(bio_kmalloc);
495
496void zero_fill_bio(struct bio *bio)
497{
498 unsigned long flags;
499 struct bio_vec bv;
500 struct bvec_iter iter;
501
502 bio_for_each_segment(bv, bio, iter) {
503 char *data = bvec_kmap_irq(&bv, &flags);
504 memset(data, 0, bv.bv_len);
505 flush_dcache_page(bv.bv_page);
506 bvec_kunmap_irq(data, &flags);
507 }
508}
509EXPORT_SYMBOL(zero_fill_bio);
510
511
512
513
514
515
516
517
518
519
520
521void bio_truncate(struct bio *bio, unsigned new_size)
522{
523 struct bio_vec bv;
524 struct bvec_iter iter;
525 unsigned int done = 0;
526 bool truncated = false;
527
528 if (new_size >= bio->bi_iter.bi_size)
529 return;
530
531 if (bio_op(bio) != REQ_OP_READ)
532 goto exit;
533
534 bio_for_each_segment(bv, bio, iter) {
535 if (done + bv.bv_len > new_size) {
536 unsigned offset;
537
538 if (!truncated)
539 offset = new_size - done;
540 else
541 offset = 0;
542 zero_user(bv.bv_page, offset, bv.bv_len - offset);
543 truncated = true;
544 }
545 done += bv.bv_len;
546 }
547
548 exit:
549
550
551
552
553
554
555
556
557 bio->bi_iter.bi_size = new_size;
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572void guard_bio_eod(struct bio *bio)
573{
574 sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
575
576 if (!maxsector)
577 return;
578
579
580
581
582
583
584 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
585 return;
586
587 maxsector -= bio->bi_iter.bi_sector;
588 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
589 return;
590
591 bio_truncate(bio, maxsector << 9);
592}
593
594
595
596
597
598
599
600
601
602void bio_put(struct bio *bio)
603{
604 if (!bio_flagged(bio, BIO_REFFED))
605 bio_free(bio);
606 else {
607 BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
608
609
610
611
612 if (atomic_dec_and_test(&bio->__bi_cnt))
613 bio_free(bio);
614 }
615}
616EXPORT_SYMBOL(bio_put);
617
618
619
620
621
622
623
624
625
626
627
628
629void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
630{
631 WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
632
633
634
635
636
637 bio->bi_bdev = bio_src->bi_bdev;
638 bio_set_flag(bio, BIO_CLONED);
639 if (bio_flagged(bio_src, BIO_THROTTLED))
640 bio_set_flag(bio, BIO_THROTTLED);
641 if (bio_flagged(bio_src, BIO_REMAPPED))
642 bio_set_flag(bio, BIO_REMAPPED);
643 bio->bi_opf = bio_src->bi_opf;
644 bio->bi_ioprio = bio_src->bi_ioprio;
645 bio->bi_write_hint = bio_src->bi_write_hint;
646 bio->bi_iter = bio_src->bi_iter;
647 bio->bi_io_vec = bio_src->bi_io_vec;
648
649 bio_clone_blkg_association(bio, bio_src);
650 blkcg_bio_issue_init(bio);
651}
652EXPORT_SYMBOL(__bio_clone_fast);
653
654
655
656
657
658
659
660
661
662struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
663{
664 struct bio *b;
665
666 b = bio_alloc_bioset(gfp_mask, 0, bs);
667 if (!b)
668 return NULL;
669
670 __bio_clone_fast(b, bio);
671
672 if (bio_crypt_clone(b, bio, gfp_mask) < 0)
673 goto err_put;
674
675 if (bio_integrity(bio) &&
676 bio_integrity_clone(b, bio, gfp_mask) < 0)
677 goto err_put;
678
679 return b;
680
681err_put:
682 bio_put(b);
683 return NULL;
684}
685EXPORT_SYMBOL(bio_clone_fast);
686
687const char *bio_devname(struct bio *bio, char *buf)
688{
689 return bdevname(bio->bi_bdev, buf);
690}
691EXPORT_SYMBOL(bio_devname);
692
693static inline bool page_is_mergeable(const struct bio_vec *bv,
694 struct page *page, unsigned int len, unsigned int off,
695 bool *same_page)
696{
697 size_t bv_end = bv->bv_offset + bv->bv_len;
698 phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
699 phys_addr_t page_addr = page_to_phys(page);
700
701 if (vec_end_addr + 1 != page_addr + off)
702 return false;
703 if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
704 return false;
705
706 *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
707 if (*same_page)
708 return true;
709 return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
710}
711
712
713
714
715
716
717static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
718 struct page *page, unsigned len,
719 unsigned offset, bool *same_page)
720{
721 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
722 unsigned long mask = queue_segment_boundary(q);
723 phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
724 phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
725
726 if ((addr1 | mask) != (addr2 | mask))
727 return false;
728 if (bv->bv_len + len > queue_max_segment_size(q))
729 return false;
730 return __bio_try_merge_page(bio, page, len, offset, same_page);
731}
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746int bio_add_hw_page(struct request_queue *q, struct bio *bio,
747 struct page *page, unsigned int len, unsigned int offset,
748 unsigned int max_sectors, bool *same_page)
749{
750 struct bio_vec *bvec;
751
752 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
753 return 0;
754
755 if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
756 return 0;
757
758 if (bio->bi_vcnt > 0) {
759 if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
760 return len;
761
762
763
764
765
766 bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
767 if (bvec_gap_to_prev(q, bvec, offset))
768 return 0;
769 }
770
771 if (bio_full(bio, len))
772 return 0;
773
774 if (bio->bi_vcnt >= queue_max_segments(q))
775 return 0;
776
777 bvec = &bio->bi_io_vec[bio->bi_vcnt];
778 bvec->bv_page = page;
779 bvec->bv_len = len;
780 bvec->bv_offset = offset;
781 bio->bi_vcnt++;
782 bio->bi_iter.bi_size += len;
783 return len;
784}
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801int bio_add_pc_page(struct request_queue *q, struct bio *bio,
802 struct page *page, unsigned int len, unsigned int offset)
803{
804 bool same_page = false;
805 return bio_add_hw_page(q, bio, page, len, offset,
806 queue_max_hw_sectors(q), &same_page);
807}
808EXPORT_SYMBOL(bio_add_pc_page);
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826int bio_add_zone_append_page(struct bio *bio, struct page *page,
827 unsigned int len, unsigned int offset)
828{
829 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
830 bool same_page = false;
831
832 if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
833 return 0;
834
835 if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
836 return 0;
837
838 return bio_add_hw_page(q, bio, page, len, offset,
839 queue_max_zone_append_sectors(q), &same_page);
840}
841EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859bool __bio_try_merge_page(struct bio *bio, struct page *page,
860 unsigned int len, unsigned int off, bool *same_page)
861{
862 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
863 return false;
864
865 if (bio->bi_vcnt > 0) {
866 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
867
868 if (page_is_mergeable(bv, page, len, off, same_page)) {
869 if (bio->bi_iter.bi_size > UINT_MAX - len) {
870 *same_page = false;
871 return false;
872 }
873 bv->bv_len += len;
874 bio->bi_iter.bi_size += len;
875 return true;
876 }
877 }
878 return false;
879}
880EXPORT_SYMBOL_GPL(__bio_try_merge_page);
881
882
883
884
885
886
887
888
889
890
891
892void __bio_add_page(struct bio *bio, struct page *page,
893 unsigned int len, unsigned int off)
894{
895 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
896
897 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
898 WARN_ON_ONCE(bio_full(bio, len));
899
900 bv->bv_page = page;
901 bv->bv_offset = off;
902 bv->bv_len = len;
903
904 bio->bi_iter.bi_size += len;
905 bio->bi_vcnt++;
906
907 if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
908 bio_set_flag(bio, BIO_WORKINGSET);
909}
910EXPORT_SYMBOL_GPL(__bio_add_page);
911
912
913
914
915
916
917
918
919
920
921
922int bio_add_page(struct bio *bio, struct page *page,
923 unsigned int len, unsigned int offset)
924{
925 bool same_page = false;
926
927 if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
928 if (bio_full(bio, len))
929 return 0;
930 __bio_add_page(bio, page, len, offset);
931 }
932 return len;
933}
934EXPORT_SYMBOL(bio_add_page);
935
936void bio_release_pages(struct bio *bio, bool mark_dirty)
937{
938 struct bvec_iter_all iter_all;
939 struct bio_vec *bvec;
940
941 if (bio_flagged(bio, BIO_NO_PAGE_REF))
942 return;
943
944 bio_for_each_segment_all(bvec, bio, iter_all) {
945 if (mark_dirty && !PageCompound(bvec->bv_page))
946 set_page_dirty_lock(bvec->bv_page);
947 put_page(bvec->bv_page);
948 }
949}
950EXPORT_SYMBOL_GPL(bio_release_pages);
951
952static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
953{
954 WARN_ON_ONCE(bio->bi_max_vecs);
955
956 bio->bi_vcnt = iter->nr_segs;
957 bio->bi_io_vec = (struct bio_vec *)iter->bvec;
958 bio->bi_iter.bi_bvec_done = iter->iov_offset;
959 bio->bi_iter.bi_size = iter->count;
960 bio_set_flag(bio, BIO_NO_PAGE_REF);
961 bio_set_flag(bio, BIO_CLONED);
962}
963
964static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
965{
966 __bio_iov_bvec_set(bio, iter);
967 iov_iter_advance(iter, iter->count);
968 return 0;
969}
970
971static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
972{
973 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
974 struct iov_iter i = *iter;
975
976 iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
977 __bio_iov_bvec_set(bio, &i);
978 iov_iter_advance(iter, i.count);
979 return 0;
980}
981
982#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
983
984
985
986
987
988
989
990
991
992
993
994static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
995{
996 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
997 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
998 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
999 struct page **pages = (struct page **)bv;
1000 bool same_page = false;
1001 ssize_t size, left;
1002 unsigned len, i;
1003 size_t offset;
1004
1005
1006
1007
1008
1009
1010 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1011 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1012
1013 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1014 if (unlikely(size <= 0))
1015 return size ? size : -EFAULT;
1016
1017 for (left = size, i = 0; left > 0; left -= len, i++) {
1018 struct page *page = pages[i];
1019
1020 len = min_t(size_t, PAGE_SIZE - offset, left);
1021
1022 if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
1023 if (same_page)
1024 put_page(page);
1025 } else {
1026 if (WARN_ON_ONCE(bio_full(bio, len)))
1027 return -EINVAL;
1028 __bio_add_page(bio, page, len, offset);
1029 }
1030 offset = 0;
1031 }
1032
1033 iov_iter_advance(iter, size);
1034 return 0;
1035}
1036
1037static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
1038{
1039 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1040 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1041 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
1042 unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
1043 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
1044 struct page **pages = (struct page **)bv;
1045 ssize_t size, left;
1046 unsigned len, i;
1047 size_t offset;
1048 int ret = 0;
1049
1050 if (WARN_ON_ONCE(!max_append_sectors))
1051 return 0;
1052
1053
1054
1055
1056
1057
1058 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1059 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1060
1061 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1062 if (unlikely(size <= 0))
1063 return size ? size : -EFAULT;
1064
1065 for (left = size, i = 0; left > 0; left -= len, i++) {
1066 struct page *page = pages[i];
1067 bool same_page = false;
1068
1069 len = min_t(size_t, PAGE_SIZE - offset, left);
1070 if (bio_add_hw_page(q, bio, page, len, offset,
1071 max_append_sectors, &same_page) != len) {
1072 ret = -EINVAL;
1073 break;
1074 }
1075 if (same_page)
1076 put_page(page);
1077 offset = 0;
1078 }
1079
1080 iov_iter_advance(iter, size - left);
1081 return ret;
1082}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
1108{
1109 int ret = 0;
1110
1111 if (iov_iter_is_bvec(iter)) {
1112 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
1113 return bio_iov_bvec_set_append(bio, iter);
1114 return bio_iov_bvec_set(bio, iter);
1115 }
1116
1117 do {
1118 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
1119 ret = __bio_iov_append_get_pages(bio, iter);
1120 else
1121 ret = __bio_iov_iter_get_pages(bio, iter);
1122 } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
1123
1124
1125 bio_clear_flag(bio, BIO_WORKINGSET);
1126 return bio->bi_vcnt ? 0 : ret;
1127}
1128EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
1129
1130static void submit_bio_wait_endio(struct bio *bio)
1131{
1132 complete(bio->bi_private);
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146int submit_bio_wait(struct bio *bio)
1147{
1148 DECLARE_COMPLETION_ONSTACK_MAP(done,
1149 bio->bi_bdev->bd_disk->lockdep_map);
1150 unsigned long hang_check;
1151
1152 bio->bi_private = &done;
1153 bio->bi_end_io = submit_bio_wait_endio;
1154 bio->bi_opf |= REQ_SYNC;
1155 submit_bio(bio);
1156
1157
1158 hang_check = sysctl_hung_task_timeout_secs;
1159 if (hang_check)
1160 while (!wait_for_completion_io_timeout(&done,
1161 hang_check * (HZ/2)))
1162 ;
1163 else
1164 wait_for_completion_io(&done);
1165
1166 return blk_status_to_errno(bio->bi_status);
1167}
1168EXPORT_SYMBOL(submit_bio_wait);
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181void bio_advance(struct bio *bio, unsigned bytes)
1182{
1183 if (bio_integrity(bio))
1184 bio_integrity_advance(bio, bytes);
1185
1186 bio_crypt_advance(bio, bytes);
1187 bio_advance_iter(bio, &bio->bi_iter, bytes);
1188}
1189EXPORT_SYMBOL(bio_advance);
1190
1191void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
1192 struct bio *src, struct bvec_iter *src_iter)
1193{
1194 struct bio_vec src_bv, dst_bv;
1195 void *src_p, *dst_p;
1196 unsigned bytes;
1197
1198 while (src_iter->bi_size && dst_iter->bi_size) {
1199 src_bv = bio_iter_iovec(src, *src_iter);
1200 dst_bv = bio_iter_iovec(dst, *dst_iter);
1201
1202 bytes = min(src_bv.bv_len, dst_bv.bv_len);
1203
1204 src_p = kmap_atomic(src_bv.bv_page);
1205 dst_p = kmap_atomic(dst_bv.bv_page);
1206
1207 memcpy(dst_p + dst_bv.bv_offset,
1208 src_p + src_bv.bv_offset,
1209 bytes);
1210
1211 kunmap_atomic(dst_p);
1212 kunmap_atomic(src_p);
1213
1214 flush_dcache_page(dst_bv.bv_page);
1215
1216 bio_advance_iter_single(src, src_iter, bytes);
1217 bio_advance_iter_single(dst, dst_iter, bytes);
1218 }
1219}
1220EXPORT_SYMBOL(bio_copy_data_iter);
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230void bio_copy_data(struct bio *dst, struct bio *src)
1231{
1232 struct bvec_iter src_iter = src->bi_iter;
1233 struct bvec_iter dst_iter = dst->bi_iter;
1234
1235 bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
1236}
1237EXPORT_SYMBOL(bio_copy_data);
1238
1239void bio_free_pages(struct bio *bio)
1240{
1241 struct bio_vec *bvec;
1242 struct bvec_iter_all iter_all;
1243
1244 bio_for_each_segment_all(bvec, bio, iter_all)
1245 __free_page(bvec->bv_page);
1246}
1247EXPORT_SYMBOL(bio_free_pages);
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278void bio_set_pages_dirty(struct bio *bio)
1279{
1280 struct bio_vec *bvec;
1281 struct bvec_iter_all iter_all;
1282
1283 bio_for_each_segment_all(bvec, bio, iter_all) {
1284 if (!PageCompound(bvec->bv_page))
1285 set_page_dirty_lock(bvec->bv_page);
1286 }
1287}
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300static void bio_dirty_fn(struct work_struct *work);
1301
1302static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1303static DEFINE_SPINLOCK(bio_dirty_lock);
1304static struct bio *bio_dirty_list;
1305
1306
1307
1308
1309static void bio_dirty_fn(struct work_struct *work)
1310{
1311 struct bio *bio, *next;
1312
1313 spin_lock_irq(&bio_dirty_lock);
1314 next = bio_dirty_list;
1315 bio_dirty_list = NULL;
1316 spin_unlock_irq(&bio_dirty_lock);
1317
1318 while ((bio = next) != NULL) {
1319 next = bio->bi_private;
1320
1321 bio_release_pages(bio, true);
1322 bio_put(bio);
1323 }
1324}
1325
1326void bio_check_pages_dirty(struct bio *bio)
1327{
1328 struct bio_vec *bvec;
1329 unsigned long flags;
1330 struct bvec_iter_all iter_all;
1331
1332 bio_for_each_segment_all(bvec, bio, iter_all) {
1333 if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
1334 goto defer;
1335 }
1336
1337 bio_release_pages(bio, false);
1338 bio_put(bio);
1339 return;
1340defer:
1341 spin_lock_irqsave(&bio_dirty_lock, flags);
1342 bio->bi_private = bio_dirty_list;
1343 bio_dirty_list = bio;
1344 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1345 schedule_work(&bio_dirty_work);
1346}
1347
1348static inline bool bio_remaining_done(struct bio *bio)
1349{
1350
1351
1352
1353
1354 if (!bio_flagged(bio, BIO_CHAIN))
1355 return true;
1356
1357 BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
1358
1359 if (atomic_dec_and_test(&bio->__bi_remaining)) {
1360 bio_clear_flag(bio, BIO_CHAIN);
1361 return true;
1362 }
1363
1364 return false;
1365}
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380void bio_endio(struct bio *bio)
1381{
1382again:
1383 if (!bio_remaining_done(bio))
1384 return;
1385 if (!bio_integrity_endio(bio))
1386 return;
1387
1388 if (bio->bi_bdev)
1389 rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
1390
1391 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1392 trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
1393 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
1394 }
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404 if (bio->bi_end_io == bio_chain_endio) {
1405 bio = __bio_chain_endio(bio);
1406 goto again;
1407 }
1408
1409 blk_throtl_bio_endio(bio);
1410
1411 bio_uninit(bio);
1412 if (bio->bi_end_io)
1413 bio->bi_end_io(bio);
1414}
1415EXPORT_SYMBOL(bio_endio);
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431struct bio *bio_split(struct bio *bio, int sectors,
1432 gfp_t gfp, struct bio_set *bs)
1433{
1434 struct bio *split;
1435
1436 BUG_ON(sectors <= 0);
1437 BUG_ON(sectors >= bio_sectors(bio));
1438
1439
1440 if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
1441 return NULL;
1442
1443 split = bio_clone_fast(bio, gfp, bs);
1444 if (!split)
1445 return NULL;
1446
1447 split->bi_iter.bi_size = sectors << 9;
1448
1449 if (bio_integrity(split))
1450 bio_integrity_trim(split);
1451
1452 bio_advance(bio, split->bi_iter.bi_size);
1453
1454 if (bio_flagged(bio, BIO_TRACE_COMPLETION))
1455 bio_set_flag(split, BIO_TRACE_COMPLETION);
1456
1457 return split;
1458}
1459EXPORT_SYMBOL(bio_split);
1460
1461
1462
1463
1464
1465
1466
1467void bio_trim(struct bio *bio, int offset, int size)
1468{
1469
1470
1471
1472
1473 size <<= 9;
1474 if (offset == 0 && size == bio->bi_iter.bi_size)
1475 return;
1476
1477 bio_advance(bio, offset << 9);
1478 bio->bi_iter.bi_size = size;
1479
1480 if (bio_integrity(bio))
1481 bio_integrity_trim(bio);
1482
1483}
1484EXPORT_SYMBOL_GPL(bio_trim);
1485
1486
1487
1488
1489
1490int biovec_init_pool(mempool_t *pool, int pool_entries)
1491{
1492 struct biovec_slab *bp = bvec_slabs + ARRAY_SIZE(bvec_slabs) - 1;
1493
1494 return mempool_init_slab_pool(pool, pool_entries, bp->slab);
1495}
1496
1497
1498
1499
1500
1501
1502
1503void bioset_exit(struct bio_set *bs)
1504{
1505 if (bs->rescue_workqueue)
1506 destroy_workqueue(bs->rescue_workqueue);
1507 bs->rescue_workqueue = NULL;
1508
1509 mempool_exit(&bs->bio_pool);
1510 mempool_exit(&bs->bvec_pool);
1511
1512 bioset_integrity_free(bs);
1513 if (bs->bio_slab)
1514 bio_put_slab(bs);
1515 bs->bio_slab = NULL;
1516}
1517EXPORT_SYMBOL(bioset_exit);
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540int bioset_init(struct bio_set *bs,
1541 unsigned int pool_size,
1542 unsigned int front_pad,
1543 int flags)
1544{
1545 bs->front_pad = front_pad;
1546 if (flags & BIOSET_NEED_BVECS)
1547 bs->back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1548 else
1549 bs->back_pad = 0;
1550
1551 spin_lock_init(&bs->rescue_lock);
1552 bio_list_init(&bs->rescue_list);
1553 INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
1554
1555 bs->bio_slab = bio_find_or_create_slab(bs);
1556 if (!bs->bio_slab)
1557 return -ENOMEM;
1558
1559 if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
1560 goto bad;
1561
1562 if ((flags & BIOSET_NEED_BVECS) &&
1563 biovec_init_pool(&bs->bvec_pool, pool_size))
1564 goto bad;
1565
1566 if (!(flags & BIOSET_NEED_RESCUER))
1567 return 0;
1568
1569 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1570 if (!bs->rescue_workqueue)
1571 goto bad;
1572
1573 return 0;
1574bad:
1575 bioset_exit(bs);
1576 return -ENOMEM;
1577}
1578EXPORT_SYMBOL(bioset_init);
1579
1580
1581
1582
1583
1584int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
1585{
1586 int flags;
1587
1588 flags = 0;
1589 if (src->bvec_pool.min_nr)
1590 flags |= BIOSET_NEED_BVECS;
1591 if (src->rescue_workqueue)
1592 flags |= BIOSET_NEED_RESCUER;
1593
1594 return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
1595}
1596EXPORT_SYMBOL(bioset_init_from_src);
1597
1598static int __init init_bio(void)
1599{
1600 int i;
1601
1602 bio_integrity_init();
1603
1604 for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
1605 struct biovec_slab *bvs = bvec_slabs + i;
1606
1607 bvs->slab = kmem_cache_create(bvs->name,
1608 bvs->nr_vecs * sizeof(struct bio_vec), 0,
1609 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1610 }
1611
1612 if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
1613 panic("bio: can't allocate bios\n");
1614
1615 if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
1616 panic("bio: can't create integrity pool\n");
1617
1618 return 0;
1619}
1620subsys_initcall(init_bio);
1621