1
2
3
4
5#include <linux/mm.h>
6#include <linux/swap.h>
7#include <linux/bio.h>
8#include <linux/blkdev.h>
9#include <linux/uio.h>
10#include <linux/iocontext.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/export.h>
15#include <linux/mempool.h>
16#include <linux/workqueue.h>
17#include <linux/cgroup.h>
18#include <linux/blk-cgroup.h>
19#include <linux/highmem.h>
20#include <linux/sched/sysctl.h>
21#include <linux/blk-crypto.h>
22#include <linux/xarray.h>
23
24#include <trace/events/block.h>
25#include "blk.h"
26#include "blk-rq-qos.h"
27
28struct bio_alloc_cache {
29 struct bio_list free_list;
30 unsigned int nr;
31};
32
33static struct biovec_slab {
34 int nr_vecs;
35 char *name;
36 struct kmem_cache *slab;
37} bvec_slabs[] __read_mostly = {
38 { .nr_vecs = 16, .name = "biovec-16" },
39 { .nr_vecs = 64, .name = "biovec-64" },
40 { .nr_vecs = 128, .name = "biovec-128" },
41 { .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
42};
43
44static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
45{
46 switch (nr_vecs) {
47
48 case 5 ... 16:
49 return &bvec_slabs[0];
50 case 17 ... 64:
51 return &bvec_slabs[1];
52 case 65 ... 128:
53 return &bvec_slabs[2];
54 case 129 ... BIO_MAX_VECS:
55 return &bvec_slabs[3];
56 default:
57 BUG();
58 return NULL;
59 }
60}
61
62
63
64
65
66struct bio_set fs_bio_set;
67EXPORT_SYMBOL(fs_bio_set);
68
69
70
71
72struct bio_slab {
73 struct kmem_cache *slab;
74 unsigned int slab_ref;
75 unsigned int slab_size;
76 char name[8];
77};
78static DEFINE_MUTEX(bio_slab_lock);
79static DEFINE_XARRAY(bio_slabs);
80
81static struct bio_slab *create_bio_slab(unsigned int size)
82{
83 struct bio_slab *bslab = kzalloc(sizeof(*bslab), GFP_KERNEL);
84
85 if (!bslab)
86 return NULL;
87
88 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size);
89 bslab->slab = kmem_cache_create(bslab->name, size,
90 ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
91 if (!bslab->slab)
92 goto fail_alloc_slab;
93
94 bslab->slab_ref = 1;
95 bslab->slab_size = size;
96
97 if (!xa_err(xa_store(&bio_slabs, size, bslab, GFP_KERNEL)))
98 return bslab;
99
100 kmem_cache_destroy(bslab->slab);
101
102fail_alloc_slab:
103 kfree(bslab);
104 return NULL;
105}
106
107static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
108{
109 return bs->front_pad + sizeof(struct bio) + bs->back_pad;
110}
111
112static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
113{
114 unsigned int size = bs_bio_slab_size(bs);
115 struct bio_slab *bslab;
116
117 mutex_lock(&bio_slab_lock);
118 bslab = xa_load(&bio_slabs, size);
119 if (bslab)
120 bslab->slab_ref++;
121 else
122 bslab = create_bio_slab(size);
123 mutex_unlock(&bio_slab_lock);
124
125 if (bslab)
126 return bslab->slab;
127 return NULL;
128}
129
130static void bio_put_slab(struct bio_set *bs)
131{
132 struct bio_slab *bslab = NULL;
133 unsigned int slab_size = bs_bio_slab_size(bs);
134
135 mutex_lock(&bio_slab_lock);
136
137 bslab = xa_load(&bio_slabs, slab_size);
138 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
139 goto out;
140
141 WARN_ON_ONCE(bslab->slab != bs->bio_slab);
142
143 WARN_ON(!bslab->slab_ref);
144
145 if (--bslab->slab_ref)
146 goto out;
147
148 xa_erase(&bio_slabs, slab_size);
149
150 kmem_cache_destroy(bslab->slab);
151 kfree(bslab);
152
153out:
154 mutex_unlock(&bio_slab_lock);
155}
156
157void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
158{
159 BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
160
161 if (nr_vecs == BIO_MAX_VECS)
162 mempool_free(bv, pool);
163 else if (nr_vecs > BIO_INLINE_VECS)
164 kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
165}
166
167
168
169
170
171static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
172{
173 return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) |
174 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
175}
176
177struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
178 gfp_t gfp_mask)
179{
180 struct biovec_slab *bvs = biovec_slab(*nr_vecs);
181
182 if (WARN_ON_ONCE(!bvs))
183 return NULL;
184
185
186
187
188
189 *nr_vecs = bvs->nr_vecs;
190
191
192
193
194
195
196 if (*nr_vecs < BIO_MAX_VECS) {
197 struct bio_vec *bvl;
198
199 bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
200 if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
201 return bvl;
202 *nr_vecs = BIO_MAX_VECS;
203 }
204
205 return mempool_alloc(pool, gfp_mask);
206}
207
208void bio_uninit(struct bio *bio)
209{
210#ifdef CONFIG_BLK_CGROUP
211 if (bio->bi_blkg) {
212 blkg_put(bio->bi_blkg);
213 bio->bi_blkg = NULL;
214 }
215#endif
216 if (bio_integrity(bio))
217 bio_integrity_free(bio);
218
219 bio_crypt_free_ctx(bio);
220}
221EXPORT_SYMBOL(bio_uninit);
222
223static void bio_free(struct bio *bio)
224{
225 struct bio_set *bs = bio->bi_pool;
226 void *p;
227
228 bio_uninit(bio);
229
230 if (bs) {
231 bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
232
233
234
235
236 p = bio;
237 p -= bs->front_pad;
238
239 mempool_free(p, &bs->bio_pool);
240 } else {
241
242 kfree(bio);
243 }
244}
245
246
247
248
249
250
251void bio_init(struct bio *bio, struct bio_vec *table,
252 unsigned short max_vecs)
253{
254 bio->bi_next = NULL;
255 bio->bi_bdev = NULL;
256 bio->bi_opf = 0;
257 bio->bi_flags = 0;
258 bio->bi_ioprio = 0;
259 bio->bi_write_hint = 0;
260 bio->bi_status = 0;
261 bio->bi_iter.bi_sector = 0;
262 bio->bi_iter.bi_size = 0;
263 bio->bi_iter.bi_idx = 0;
264 bio->bi_iter.bi_bvec_done = 0;
265 bio->bi_end_io = NULL;
266 bio->bi_private = NULL;
267#ifdef CONFIG_BLK_CGROUP
268 bio->bi_blkg = NULL;
269 bio->bi_issue.value = 0;
270#ifdef CONFIG_BLK_CGROUP_IOCOST
271 bio->bi_iocost_cost = 0;
272#endif
273#endif
274#ifdef CONFIG_BLK_INLINE_ENCRYPTION
275 bio->bi_crypt_context = NULL;
276#endif
277#ifdef CONFIG_BLK_DEV_INTEGRITY
278 bio->bi_integrity = NULL;
279#endif
280 bio->bi_vcnt = 0;
281
282 atomic_set(&bio->__bi_remaining, 1);
283 atomic_set(&bio->__bi_cnt, 1);
284
285 bio->bi_max_vecs = max_vecs;
286 bio->bi_io_vec = table;
287 bio->bi_pool = NULL;
288}
289EXPORT_SYMBOL(bio_init);
290
291
292
293
294
295
296
297
298
299
300
301void bio_reset(struct bio *bio)
302{
303 bio_uninit(bio);
304 memset(bio, 0, BIO_RESET_BYTES);
305 atomic_set(&bio->__bi_remaining, 1);
306}
307EXPORT_SYMBOL(bio_reset);
308
309static struct bio *__bio_chain_endio(struct bio *bio)
310{
311 struct bio *parent = bio->bi_private;
312
313 if (bio->bi_status && !parent->bi_status)
314 parent->bi_status = bio->bi_status;
315 bio_put(bio);
316 return parent;
317}
318
319static void bio_chain_endio(struct bio *bio)
320{
321 bio_endio(__bio_chain_endio(bio));
322}
323
324
325
326
327
328
329
330
331
332
333
334
335void bio_chain(struct bio *bio, struct bio *parent)
336{
337 BUG_ON(bio->bi_private || bio->bi_end_io);
338
339 bio->bi_private = parent;
340 bio->bi_end_io = bio_chain_endio;
341 bio_inc_remaining(parent);
342}
343EXPORT_SYMBOL(bio_chain);
344
345static void bio_alloc_rescue(struct work_struct *work)
346{
347 struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
348 struct bio *bio;
349
350 while (1) {
351 spin_lock(&bs->rescue_lock);
352 bio = bio_list_pop(&bs->rescue_list);
353 spin_unlock(&bs->rescue_lock);
354
355 if (!bio)
356 break;
357
358 submit_bio_noacct(bio);
359 }
360}
361
362static void punt_bios_to_rescuer(struct bio_set *bs)
363{
364 struct bio_list punt, nopunt;
365 struct bio *bio;
366
367 if (WARN_ON_ONCE(!bs->rescue_workqueue))
368 return;
369
370
371
372
373
374
375
376
377
378
379
380 bio_list_init(&punt);
381 bio_list_init(&nopunt);
382
383 while ((bio = bio_list_pop(¤t->bio_list[0])))
384 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
385 current->bio_list[0] = nopunt;
386
387 bio_list_init(&nopunt);
388 while ((bio = bio_list_pop(¤t->bio_list[1])))
389 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
390 current->bio_list[1] = nopunt;
391
392 spin_lock(&bs->rescue_lock);
393 bio_list_merge(&bs->rescue_list, &punt);
394 spin_unlock(&bs->rescue_lock);
395
396 queue_work(bs->rescue_workqueue, &bs->rescue_work);
397}
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
432 struct bio_set *bs)
433{
434 gfp_t saved_gfp = gfp_mask;
435 struct bio *bio;
436 void *p;
437
438
439 if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
440 return NULL;
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460 if (current->bio_list &&
461 (!bio_list_empty(¤t->bio_list[0]) ||
462 !bio_list_empty(¤t->bio_list[1])) &&
463 bs->rescue_workqueue)
464 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
465
466 p = mempool_alloc(&bs->bio_pool, gfp_mask);
467 if (!p && gfp_mask != saved_gfp) {
468 punt_bios_to_rescuer(bs);
469 gfp_mask = saved_gfp;
470 p = mempool_alloc(&bs->bio_pool, gfp_mask);
471 }
472 if (unlikely(!p))
473 return NULL;
474
475 bio = p + bs->front_pad;
476 if (nr_iovecs > BIO_INLINE_VECS) {
477 struct bio_vec *bvl = NULL;
478
479 bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
480 if (!bvl && gfp_mask != saved_gfp) {
481 punt_bios_to_rescuer(bs);
482 gfp_mask = saved_gfp;
483 bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
484 }
485 if (unlikely(!bvl))
486 goto err_free;
487
488 bio_init(bio, bvl, nr_iovecs);
489 } else if (nr_iovecs) {
490 bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
491 } else {
492 bio_init(bio, NULL, 0);
493 }
494
495 bio->bi_pool = bs;
496 return bio;
497
498err_free:
499 mempool_free(p, &bs->bio_pool);
500 return NULL;
501}
502EXPORT_SYMBOL(bio_alloc_bioset);
503
504
505
506
507
508
509
510
511
512
513struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
514{
515 struct bio *bio;
516
517 if (nr_iovecs > UIO_MAXIOV)
518 return NULL;
519
520 bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
521 if (unlikely(!bio))
522 return NULL;
523 bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
524 bio->bi_pool = NULL;
525 return bio;
526}
527EXPORT_SYMBOL(bio_kmalloc);
528
529void zero_fill_bio(struct bio *bio)
530{
531 struct bio_vec bv;
532 struct bvec_iter iter;
533
534 bio_for_each_segment(bv, bio, iter)
535 memzero_bvec(&bv);
536}
537EXPORT_SYMBOL(zero_fill_bio);
538
539
540
541
542
543
544
545
546
547
548
549void bio_truncate(struct bio *bio, unsigned new_size)
550{
551 struct bio_vec bv;
552 struct bvec_iter iter;
553 unsigned int done = 0;
554 bool truncated = false;
555
556 if (new_size >= bio->bi_iter.bi_size)
557 return;
558
559 if (bio_op(bio) != REQ_OP_READ)
560 goto exit;
561
562 bio_for_each_segment(bv, bio, iter) {
563 if (done + bv.bv_len > new_size) {
564 unsigned offset;
565
566 if (!truncated)
567 offset = new_size - done;
568 else
569 offset = 0;
570 zero_user(bv.bv_page, offset, bv.bv_len - offset);
571 truncated = true;
572 }
573 done += bv.bv_len;
574 }
575
576 exit:
577
578
579
580
581
582
583
584
585 bio->bi_iter.bi_size = new_size;
586}
587
588
589
590
591
592
593
594
595
596
597
598
599
600void guard_bio_eod(struct bio *bio)
601{
602 sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
603
604 if (!maxsector)
605 return;
606
607
608
609
610
611
612 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
613 return;
614
615 maxsector -= bio->bi_iter.bi_sector;
616 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
617 return;
618
619 bio_truncate(bio, maxsector << 9);
620}
621
622#define ALLOC_CACHE_MAX 512
623#define ALLOC_CACHE_SLACK 64
624
625static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
626 unsigned int nr)
627{
628 unsigned int i = 0;
629 struct bio *bio;
630
631 while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
632 cache->nr--;
633 bio_free(bio);
634 if (++i == nr)
635 break;
636 }
637}
638
639static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
640{
641 struct bio_set *bs;
642
643 bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead);
644 if (bs->cache) {
645 struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu);
646
647 bio_alloc_cache_prune(cache, -1U);
648 }
649 return 0;
650}
651
652static void bio_alloc_cache_destroy(struct bio_set *bs)
653{
654 int cpu;
655
656 if (!bs->cache)
657 return;
658
659 cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
660 for_each_possible_cpu(cpu) {
661 struct bio_alloc_cache *cache;
662
663 cache = per_cpu_ptr(bs->cache, cpu);
664 bio_alloc_cache_prune(cache, -1U);
665 }
666 free_percpu(bs->cache);
667}
668
669
670
671
672
673
674
675
676
677void bio_put(struct bio *bio)
678{
679 if (unlikely(bio_flagged(bio, BIO_REFFED))) {
680 BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
681 if (!atomic_dec_and_test(&bio->__bi_cnt))
682 return;
683 }
684
685 if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
686 struct bio_alloc_cache *cache;
687
688 bio_uninit(bio);
689 cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
690 bio_list_add_head(&cache->free_list, bio);
691 if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
692 bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
693 put_cpu();
694 } else {
695 bio_free(bio);
696 }
697}
698EXPORT_SYMBOL(bio_put);
699
700
701
702
703
704
705
706
707
708
709
710
711void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
712{
713 WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
714
715
716
717
718
719 bio->bi_bdev = bio_src->bi_bdev;
720 bio_set_flag(bio, BIO_CLONED);
721 if (bio_flagged(bio_src, BIO_THROTTLED))
722 bio_set_flag(bio, BIO_THROTTLED);
723 if (bio_flagged(bio_src, BIO_REMAPPED))
724 bio_set_flag(bio, BIO_REMAPPED);
725 bio->bi_opf = bio_src->bi_opf;
726 bio->bi_ioprio = bio_src->bi_ioprio;
727 bio->bi_write_hint = bio_src->bi_write_hint;
728 bio->bi_iter = bio_src->bi_iter;
729 bio->bi_io_vec = bio_src->bi_io_vec;
730
731 bio_clone_blkg_association(bio, bio_src);
732 blkcg_bio_issue_init(bio);
733}
734EXPORT_SYMBOL(__bio_clone_fast);
735
736
737
738
739
740
741
742
743
744struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
745{
746 struct bio *b;
747
748 b = bio_alloc_bioset(gfp_mask, 0, bs);
749 if (!b)
750 return NULL;
751
752 __bio_clone_fast(b, bio);
753
754 if (bio_crypt_clone(b, bio, gfp_mask) < 0)
755 goto err_put;
756
757 if (bio_integrity(bio) &&
758 bio_integrity_clone(b, bio, gfp_mask) < 0)
759 goto err_put;
760
761 return b;
762
763err_put:
764 bio_put(b);
765 return NULL;
766}
767EXPORT_SYMBOL(bio_clone_fast);
768
769const char *bio_devname(struct bio *bio, char *buf)
770{
771 return bdevname(bio->bi_bdev, buf);
772}
773EXPORT_SYMBOL(bio_devname);
774
775static inline bool page_is_mergeable(const struct bio_vec *bv,
776 struct page *page, unsigned int len, unsigned int off,
777 bool *same_page)
778{
779 size_t bv_end = bv->bv_offset + bv->bv_len;
780 phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
781 phys_addr_t page_addr = page_to_phys(page);
782
783 if (vec_end_addr + 1 != page_addr + off)
784 return false;
785 if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
786 return false;
787
788 *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
789 if (*same_page)
790 return true;
791 return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
792}
793
794
795
796
797
798
799static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
800 struct page *page, unsigned len,
801 unsigned offset, bool *same_page)
802{
803 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
804 unsigned long mask = queue_segment_boundary(q);
805 phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
806 phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
807
808 if ((addr1 | mask) != (addr2 | mask))
809 return false;
810 if (bv->bv_len + len > queue_max_segment_size(q))
811 return false;
812 return __bio_try_merge_page(bio, page, len, offset, same_page);
813}
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828int bio_add_hw_page(struct request_queue *q, struct bio *bio,
829 struct page *page, unsigned int len, unsigned int offset,
830 unsigned int max_sectors, bool *same_page)
831{
832 struct bio_vec *bvec;
833
834 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
835 return 0;
836
837 if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
838 return 0;
839
840 if (bio->bi_vcnt > 0) {
841 if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
842 return len;
843
844
845
846
847
848 bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
849 if (bvec_gap_to_prev(q, bvec, offset))
850 return 0;
851 }
852
853 if (bio_full(bio, len))
854 return 0;
855
856 if (bio->bi_vcnt >= queue_max_segments(q))
857 return 0;
858
859 bvec = &bio->bi_io_vec[bio->bi_vcnt];
860 bvec->bv_page = page;
861 bvec->bv_len = len;
862 bvec->bv_offset = offset;
863 bio->bi_vcnt++;
864 bio->bi_iter.bi_size += len;
865 return len;
866}
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883int bio_add_pc_page(struct request_queue *q, struct bio *bio,
884 struct page *page, unsigned int len, unsigned int offset)
885{
886 bool same_page = false;
887 return bio_add_hw_page(q, bio, page, len, offset,
888 queue_max_hw_sectors(q), &same_page);
889}
890EXPORT_SYMBOL(bio_add_pc_page);
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908int bio_add_zone_append_page(struct bio *bio, struct page *page,
909 unsigned int len, unsigned int offset)
910{
911 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
912 bool same_page = false;
913
914 if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
915 return 0;
916
917 if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
918 return 0;
919
920 return bio_add_hw_page(q, bio, page, len, offset,
921 queue_max_zone_append_sectors(q), &same_page);
922}
923EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941bool __bio_try_merge_page(struct bio *bio, struct page *page,
942 unsigned int len, unsigned int off, bool *same_page)
943{
944 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
945 return false;
946
947 if (bio->bi_vcnt > 0) {
948 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
949
950 if (page_is_mergeable(bv, page, len, off, same_page)) {
951 if (bio->bi_iter.bi_size > UINT_MAX - len) {
952 *same_page = false;
953 return false;
954 }
955 bv->bv_len += len;
956 bio->bi_iter.bi_size += len;
957 return true;
958 }
959 }
960 return false;
961}
962EXPORT_SYMBOL_GPL(__bio_try_merge_page);
963
964
965
966
967
968
969
970
971
972
973
974void __bio_add_page(struct bio *bio, struct page *page,
975 unsigned int len, unsigned int off)
976{
977 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
978
979 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
980 WARN_ON_ONCE(bio_full(bio, len));
981
982 bv->bv_page = page;
983 bv->bv_offset = off;
984 bv->bv_len = len;
985
986 bio->bi_iter.bi_size += len;
987 bio->bi_vcnt++;
988
989 if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
990 bio_set_flag(bio, BIO_WORKINGSET);
991}
992EXPORT_SYMBOL_GPL(__bio_add_page);
993
994
995
996
997
998
999
1000
1001
1002
1003
1004int bio_add_page(struct bio *bio, struct page *page,
1005 unsigned int len, unsigned int offset)
1006{
1007 bool same_page = false;
1008
1009 if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
1010 if (bio_full(bio, len))
1011 return 0;
1012 __bio_add_page(bio, page, len, offset);
1013 }
1014 return len;
1015}
1016EXPORT_SYMBOL(bio_add_page);
1017
1018void bio_release_pages(struct bio *bio, bool mark_dirty)
1019{
1020 struct bvec_iter_all iter_all;
1021 struct bio_vec *bvec;
1022
1023 if (bio_flagged(bio, BIO_NO_PAGE_REF))
1024 return;
1025
1026 bio_for_each_segment_all(bvec, bio, iter_all) {
1027 if (mark_dirty && !PageCompound(bvec->bv_page))
1028 set_page_dirty_lock(bvec->bv_page);
1029 put_page(bvec->bv_page);
1030 }
1031}
1032EXPORT_SYMBOL_GPL(bio_release_pages);
1033
1034static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
1035{
1036 WARN_ON_ONCE(bio->bi_max_vecs);
1037
1038 bio->bi_vcnt = iter->nr_segs;
1039 bio->bi_io_vec = (struct bio_vec *)iter->bvec;
1040 bio->bi_iter.bi_bvec_done = iter->iov_offset;
1041 bio->bi_iter.bi_size = iter->count;
1042 bio_set_flag(bio, BIO_NO_PAGE_REF);
1043 bio_set_flag(bio, BIO_CLONED);
1044}
1045
1046static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
1047{
1048 __bio_iov_bvec_set(bio, iter);
1049 iov_iter_advance(iter, iter->count);
1050 return 0;
1051}
1052
1053static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
1054{
1055 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
1056 struct iov_iter i = *iter;
1057
1058 iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
1059 __bio_iov_bvec_set(bio, &i);
1060 iov_iter_advance(iter, i.count);
1061 return 0;
1062}
1063
1064static void bio_put_pages(struct page **pages, size_t size, size_t off)
1065{
1066 size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
1067
1068 for (i = 0; i < nr; i++)
1069 put_page(pages[i]);
1070}
1071
1072#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
1085{
1086 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1087 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1088 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
1089 struct page **pages = (struct page **)bv;
1090 bool same_page = false;
1091 ssize_t size, left;
1092 unsigned len, i;
1093 size_t offset;
1094
1095
1096
1097
1098
1099
1100 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1101 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1102
1103 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1104 if (unlikely(size <= 0))
1105 return size ? size : -EFAULT;
1106
1107 for (left = size, i = 0; left > 0; left -= len, i++) {
1108 struct page *page = pages[i];
1109
1110 len = min_t(size_t, PAGE_SIZE - offset, left);
1111
1112 if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
1113 if (same_page)
1114 put_page(page);
1115 } else {
1116 if (WARN_ON_ONCE(bio_full(bio, len))) {
1117 bio_put_pages(pages + i, left, offset);
1118 return -EINVAL;
1119 }
1120 __bio_add_page(bio, page, len, offset);
1121 }
1122 offset = 0;
1123 }
1124
1125 iov_iter_advance(iter, size);
1126 return 0;
1127}
1128
1129static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
1130{
1131 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
1132 unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1133 struct request_queue *q = bio->bi_bdev->bd_disk->queue;
1134 unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
1135 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
1136 struct page **pages = (struct page **)bv;
1137 ssize_t size, left;
1138 unsigned len, i;
1139 size_t offset;
1140 int ret = 0;
1141
1142 if (WARN_ON_ONCE(!max_append_sectors))
1143 return 0;
1144
1145
1146
1147
1148
1149
1150 BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
1151 pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
1152
1153 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1154 if (unlikely(size <= 0))
1155 return size ? size : -EFAULT;
1156
1157 for (left = size, i = 0; left > 0; left -= len, i++) {
1158 struct page *page = pages[i];
1159 bool same_page = false;
1160
1161 len = min_t(size_t, PAGE_SIZE - offset, left);
1162 if (bio_add_hw_page(q, bio, page, len, offset,
1163 max_append_sectors, &same_page) != len) {
1164 bio_put_pages(pages + i, left, offset);
1165 ret = -EINVAL;
1166 break;
1167 }
1168 if (same_page)
1169 put_page(page);
1170 offset = 0;
1171 }
1172
1173 iov_iter_advance(iter, size - left);
1174 return ret;
1175}
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
1201{
1202 int ret = 0;
1203
1204 if (iov_iter_is_bvec(iter)) {
1205 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
1206 return bio_iov_bvec_set_append(bio, iter);
1207 return bio_iov_bvec_set(bio, iter);
1208 }
1209
1210 do {
1211 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
1212 ret = __bio_iov_append_get_pages(bio, iter);
1213 else
1214 ret = __bio_iov_iter_get_pages(bio, iter);
1215 } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
1216
1217
1218 bio_clear_flag(bio, BIO_WORKINGSET);
1219 return bio->bi_vcnt ? 0 : ret;
1220}
1221EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
1222
1223static void submit_bio_wait_endio(struct bio *bio)
1224{
1225 complete(bio->bi_private);
1226}
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239int submit_bio_wait(struct bio *bio)
1240{
1241 DECLARE_COMPLETION_ONSTACK_MAP(done,
1242 bio->bi_bdev->bd_disk->lockdep_map);
1243 unsigned long hang_check;
1244
1245 bio->bi_private = &done;
1246 bio->bi_end_io = submit_bio_wait_endio;
1247 bio->bi_opf |= REQ_SYNC;
1248 submit_bio(bio);
1249
1250
1251 hang_check = sysctl_hung_task_timeout_secs;
1252 if (hang_check)
1253 while (!wait_for_completion_io_timeout(&done,
1254 hang_check * (HZ/2)))
1255 ;
1256 else
1257 wait_for_completion_io(&done);
1258
1259 return blk_status_to_errno(bio->bi_status);
1260}
1261EXPORT_SYMBOL(submit_bio_wait);
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274void bio_advance(struct bio *bio, unsigned bytes)
1275{
1276 if (bio_integrity(bio))
1277 bio_integrity_advance(bio, bytes);
1278
1279 bio_crypt_advance(bio, bytes);
1280 bio_advance_iter(bio, &bio->bi_iter, bytes);
1281}
1282EXPORT_SYMBOL(bio_advance);
1283
1284void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
1285 struct bio *src, struct bvec_iter *src_iter)
1286{
1287 while (src_iter->bi_size && dst_iter->bi_size) {
1288 struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
1289 struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
1290 unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
1291 void *src_buf;
1292
1293 src_buf = bvec_kmap_local(&src_bv);
1294 memcpy_to_bvec(&dst_bv, src_buf);
1295 kunmap_local(src_buf);
1296
1297 bio_advance_iter_single(src, src_iter, bytes);
1298 bio_advance_iter_single(dst, dst_iter, bytes);
1299 }
1300}
1301EXPORT_SYMBOL(bio_copy_data_iter);
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311void bio_copy_data(struct bio *dst, struct bio *src)
1312{
1313 struct bvec_iter src_iter = src->bi_iter;
1314 struct bvec_iter dst_iter = dst->bi_iter;
1315
1316 bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
1317}
1318EXPORT_SYMBOL(bio_copy_data);
1319
1320void bio_free_pages(struct bio *bio)
1321{
1322 struct bio_vec *bvec;
1323 struct bvec_iter_all iter_all;
1324
1325 bio_for_each_segment_all(bvec, bio, iter_all)
1326 __free_page(bvec->bv_page);
1327}
1328EXPORT_SYMBOL(bio_free_pages);
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359void bio_set_pages_dirty(struct bio *bio)
1360{
1361 struct bio_vec *bvec;
1362 struct bvec_iter_all iter_all;
1363
1364 bio_for_each_segment_all(bvec, bio, iter_all) {
1365 if (!PageCompound(bvec->bv_page))
1366 set_page_dirty_lock(bvec->bv_page);
1367 }
1368}
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381static void bio_dirty_fn(struct work_struct *work);
1382
1383static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
1384static DEFINE_SPINLOCK(bio_dirty_lock);
1385static struct bio *bio_dirty_list;
1386
1387
1388
1389
1390static void bio_dirty_fn(struct work_struct *work)
1391{
1392 struct bio *bio, *next;
1393
1394 spin_lock_irq(&bio_dirty_lock);
1395 next = bio_dirty_list;
1396 bio_dirty_list = NULL;
1397 spin_unlock_irq(&bio_dirty_lock);
1398
1399 while ((bio = next) != NULL) {
1400 next = bio->bi_private;
1401
1402 bio_release_pages(bio, true);
1403 bio_put(bio);
1404 }
1405}
1406
1407void bio_check_pages_dirty(struct bio *bio)
1408{
1409 struct bio_vec *bvec;
1410 unsigned long flags;
1411 struct bvec_iter_all iter_all;
1412
1413 bio_for_each_segment_all(bvec, bio, iter_all) {
1414 if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
1415 goto defer;
1416 }
1417
1418 bio_release_pages(bio, false);
1419 bio_put(bio);
1420 return;
1421defer:
1422 spin_lock_irqsave(&bio_dirty_lock, flags);
1423 bio->bi_private = bio_dirty_list;
1424 bio_dirty_list = bio;
1425 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1426 schedule_work(&bio_dirty_work);
1427}
1428
1429static inline bool bio_remaining_done(struct bio *bio)
1430{
1431
1432
1433
1434
1435 if (!bio_flagged(bio, BIO_CHAIN))
1436 return true;
1437
1438 BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
1439
1440 if (atomic_dec_and_test(&bio->__bi_remaining)) {
1441 bio_clear_flag(bio, BIO_CHAIN);
1442 return true;
1443 }
1444
1445 return false;
1446}
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461void bio_endio(struct bio *bio)
1462{
1463again:
1464 if (!bio_remaining_done(bio))
1465 return;
1466 if (!bio_integrity_endio(bio))
1467 return;
1468
1469 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
1470 rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
1471
1472 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1473 trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
1474 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
1475 }
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 if (bio->bi_end_io == bio_chain_endio) {
1486 bio = __bio_chain_endio(bio);
1487 goto again;
1488 }
1489
1490 blk_throtl_bio_endio(bio);
1491
1492 bio_uninit(bio);
1493 if (bio->bi_end_io)
1494 bio->bi_end_io(bio);
1495}
1496EXPORT_SYMBOL(bio_endio);
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512struct bio *bio_split(struct bio *bio, int sectors,
1513 gfp_t gfp, struct bio_set *bs)
1514{
1515 struct bio *split;
1516
1517 BUG_ON(sectors <= 0);
1518 BUG_ON(sectors >= bio_sectors(bio));
1519
1520
1521 if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
1522 return NULL;
1523
1524 split = bio_clone_fast(bio, gfp, bs);
1525 if (!split)
1526 return NULL;
1527
1528 split->bi_iter.bi_size = sectors << 9;
1529
1530 if (bio_integrity(split))
1531 bio_integrity_trim(split);
1532
1533 bio_advance(bio, split->bi_iter.bi_size);
1534
1535 if (bio_flagged(bio, BIO_TRACE_COMPLETION))
1536 bio_set_flag(split, BIO_TRACE_COMPLETION);
1537
1538 return split;
1539}
1540EXPORT_SYMBOL(bio_split);
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551void bio_trim(struct bio *bio, sector_t offset, sector_t size)
1552{
1553 if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
1554 offset + size > bio->bi_iter.bi_size))
1555 return;
1556
1557 size <<= 9;
1558 if (offset == 0 && size == bio->bi_iter.bi_size)
1559 return;
1560
1561 bio_advance(bio, offset << 9);
1562 bio->bi_iter.bi_size = size;
1563
1564 if (bio_integrity(bio))
1565 bio_integrity_trim(bio);
1566}
1567EXPORT_SYMBOL_GPL(bio_trim);
1568
1569
1570
1571
1572
1573int biovec_init_pool(mempool_t *pool, int pool_entries)
1574{
1575 struct biovec_slab *bp = bvec_slabs + ARRAY_SIZE(bvec_slabs) - 1;
1576
1577 return mempool_init_slab_pool(pool, pool_entries, bp->slab);
1578}
1579
1580
1581
1582
1583
1584
1585
1586void bioset_exit(struct bio_set *bs)
1587{
1588 bio_alloc_cache_destroy(bs);
1589 if (bs->rescue_workqueue)
1590 destroy_workqueue(bs->rescue_workqueue);
1591 bs->rescue_workqueue = NULL;
1592
1593 mempool_exit(&bs->bio_pool);
1594 mempool_exit(&bs->bvec_pool);
1595
1596 bioset_integrity_free(bs);
1597 if (bs->bio_slab)
1598 bio_put_slab(bs);
1599 bs->bio_slab = NULL;
1600}
1601EXPORT_SYMBOL(bioset_exit);
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624int bioset_init(struct bio_set *bs,
1625 unsigned int pool_size,
1626 unsigned int front_pad,
1627 int flags)
1628{
1629 bs->front_pad = front_pad;
1630 if (flags & BIOSET_NEED_BVECS)
1631 bs->back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1632 else
1633 bs->back_pad = 0;
1634
1635 spin_lock_init(&bs->rescue_lock);
1636 bio_list_init(&bs->rescue_list);
1637 INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
1638
1639 bs->bio_slab = bio_find_or_create_slab(bs);
1640 if (!bs->bio_slab)
1641 return -ENOMEM;
1642
1643 if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
1644 goto bad;
1645
1646 if ((flags & BIOSET_NEED_BVECS) &&
1647 biovec_init_pool(&bs->bvec_pool, pool_size))
1648 goto bad;
1649
1650 if (flags & BIOSET_NEED_RESCUER) {
1651 bs->rescue_workqueue = alloc_workqueue("bioset",
1652 WQ_MEM_RECLAIM, 0);
1653 if (!bs->rescue_workqueue)
1654 goto bad;
1655 }
1656 if (flags & BIOSET_PERCPU_CACHE) {
1657 bs->cache = alloc_percpu(struct bio_alloc_cache);
1658 if (!bs->cache)
1659 goto bad;
1660 cpuhp_state_add_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
1661 }
1662
1663 return 0;
1664bad:
1665 bioset_exit(bs);
1666 return -ENOMEM;
1667}
1668EXPORT_SYMBOL(bioset_init);
1669
1670
1671
1672
1673
1674int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
1675{
1676 int flags;
1677
1678 flags = 0;
1679 if (src->bvec_pool.min_nr)
1680 flags |= BIOSET_NEED_BVECS;
1681 if (src->rescue_workqueue)
1682 flags |= BIOSET_NEED_RESCUER;
1683
1684 return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
1685}
1686EXPORT_SYMBOL(bioset_init_from_src);
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
1703 struct bio_set *bs)
1704{
1705 struct bio_alloc_cache *cache;
1706 struct bio *bio;
1707
1708 if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
1709 return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
1710
1711 cache = per_cpu_ptr(bs->cache, get_cpu());
1712 bio = bio_list_pop(&cache->free_list);
1713 if (bio) {
1714 cache->nr--;
1715 put_cpu();
1716 bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
1717 bio->bi_pool = bs;
1718 bio_set_flag(bio, BIO_PERCPU_CACHE);
1719 return bio;
1720 }
1721 put_cpu();
1722 bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
1723 bio_set_flag(bio, BIO_PERCPU_CACHE);
1724 return bio;
1725}
1726EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
1727
1728static int __init init_bio(void)
1729{
1730 int i;
1731
1732 bio_integrity_init();
1733
1734 for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
1735 struct biovec_slab *bvs = bvec_slabs + i;
1736
1737 bvs->slab = kmem_cache_create(bvs->name,
1738 bvs->nr_vecs * sizeof(struct bio_vec), 0,
1739 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1740 }
1741
1742 cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL,
1743 bio_cpu_dead);
1744
1745 if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
1746 panic("bio: can't allocate bios\n");
1747
1748 if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
1749 panic("bio: can't create integrity pool\n");
1750
1751 return 0;
1752}
1753subsys_initcall(init_bio);
1754