linux/block/bounce.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* bounce buffer handling for block devices
   3 *
   4 * - Split from highmem.c
   5 */
   6
   7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9#include <linux/mm.h>
  10#include <linux/export.h>
  11#include <linux/swap.h>
  12#include <linux/gfp.h>
  13#include <linux/bio.h>
  14#include <linux/pagemap.h>
  15#include <linux/mempool.h>
  16#include <linux/blkdev.h>
  17#include <linux/backing-dev.h>
  18#include <linux/init.h>
  19#include <linux/hash.h>
  20#include <linux/highmem.h>
  21#include <linux/memblock.h>
  22#include <linux/printk.h>
  23#include <asm/tlbflush.h>
  24
  25#include <trace/events/block.h>
  26#include "blk.h"
  27
  28#define POOL_SIZE       64
  29#define ISA_POOL_SIZE   16
  30
  31static struct bio_set bounce_bio_set, bounce_bio_split;
  32static mempool_t page_pool, isa_page_pool;
  33
  34static void init_bounce_bioset(void)
  35{
  36        static bool bounce_bs_setup;
  37        int ret;
  38
  39        if (bounce_bs_setup)
  40                return;
  41
  42        ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
  43        BUG_ON(ret);
  44        if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
  45                BUG_ON(1);
  46
  47        ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
  48        BUG_ON(ret);
  49        bounce_bs_setup = true;
  50}
  51
  52#if defined(CONFIG_HIGHMEM)
  53static __init int init_emergency_pool(void)
  54{
  55        int ret;
  56#if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
  57        if (max_pfn <= max_low_pfn)
  58                return 0;
  59#endif
  60
  61        ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
  62        BUG_ON(ret);
  63        pr_info("pool size: %d pages\n", POOL_SIZE);
  64
  65        init_bounce_bioset();
  66        return 0;
  67}
  68
  69__initcall(init_emergency_pool);
  70#endif
  71
  72#ifdef CONFIG_HIGHMEM
  73/*
  74 * highmem version, map in to vec
  75 */
  76static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
  77{
  78        unsigned char *vto;
  79
  80        vto = kmap_atomic(to->bv_page);
  81        memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  82        kunmap_atomic(vto);
  83}
  84
  85#else /* CONFIG_HIGHMEM */
  86
  87#define bounce_copy_vec(to, vfrom)      \
  88        memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
  89
  90#endif /* CONFIG_HIGHMEM */
  91
  92/*
  93 * allocate pages in the DMA region for the ISA pool
  94 */
  95static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
  96{
  97        return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
  98}
  99
 100static DEFINE_MUTEX(isa_mutex);
 101
 102/*
 103 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
 104 * as the max address, so check if the pool has already been created.
 105 */
 106int init_emergency_isa_pool(void)
 107{
 108        int ret;
 109
 110        mutex_lock(&isa_mutex);
 111
 112        if (mempool_initialized(&isa_page_pool)) {
 113                mutex_unlock(&isa_mutex);
 114                return 0;
 115        }
 116
 117        ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
 118                           mempool_free_pages, (void *) 0);
 119        BUG_ON(ret);
 120
 121        pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
 122        init_bounce_bioset();
 123        mutex_unlock(&isa_mutex);
 124        return 0;
 125}
 126
 127/*
 128 * Simple bounce buffer support for highmem pages. Depending on the
 129 * queue gfp mask set, *to may or may not be a highmem page. kmap it
 130 * always, it will do the Right Thing
 131 */
 132static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 133{
 134        unsigned char *vfrom;
 135        struct bio_vec tovec, fromvec;
 136        struct bvec_iter iter;
 137        /*
 138         * The bio of @from is created by bounce, so we can iterate
 139         * its bvec from start to end, but the @from->bi_iter can't be
 140         * trusted because it might be changed by splitting.
 141         */
 142        struct bvec_iter from_iter = BVEC_ITER_ALL_INIT;
 143
 144        bio_for_each_segment(tovec, to, iter) {
 145                fromvec = bio_iter_iovec(from, from_iter);
 146                if (tovec.bv_page != fromvec.bv_page) {
 147                        /*
 148                         * fromvec->bv_offset and fromvec->bv_len might have
 149                         * been modified by the block layer, so use the original
 150                         * copy, bounce_copy_vec already uses tovec->bv_len
 151                         */
 152                        vfrom = page_address(fromvec.bv_page) +
 153                                tovec.bv_offset;
 154
 155                        bounce_copy_vec(&tovec, vfrom);
 156                        flush_dcache_page(tovec.bv_page);
 157                }
 158                bio_advance_iter(from, &from_iter, tovec.bv_len);
 159        }
 160}
 161
 162static void bounce_end_io(struct bio *bio, mempool_t *pool)
 163{
 164        struct bio *bio_orig = bio->bi_private;
 165        struct bio_vec *bvec, orig_vec;
 166        struct bvec_iter orig_iter = bio_orig->bi_iter;
 167        struct bvec_iter_all iter_all;
 168
 169        /*
 170         * free up bounce indirect pages used
 171         */
 172        bio_for_each_segment_all(bvec, bio, iter_all) {
 173                orig_vec = bio_iter_iovec(bio_orig, orig_iter);
 174                if (bvec->bv_page != orig_vec.bv_page) {
 175                        dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
 176                        mempool_free(bvec->bv_page, pool);
 177                }
 178                bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len);
 179        }
 180
 181        bio_orig->bi_status = bio->bi_status;
 182        bio_endio(bio_orig);
 183        bio_put(bio);
 184}
 185
 186static void bounce_end_io_write(struct bio *bio)
 187{
 188        bounce_end_io(bio, &page_pool);
 189}
 190
 191static void bounce_end_io_write_isa(struct bio *bio)
 192{
 193
 194        bounce_end_io(bio, &isa_page_pool);
 195}
 196
 197static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 198{
 199        struct bio *bio_orig = bio->bi_private;
 200
 201        if (!bio->bi_status)
 202                copy_to_high_bio_irq(bio_orig, bio);
 203
 204        bounce_end_io(bio, pool);
 205}
 206
 207static void bounce_end_io_read(struct bio *bio)
 208{
 209        __bounce_end_io_read(bio, &page_pool);
 210}
 211
 212static void bounce_end_io_read_isa(struct bio *bio)
 213{
 214        __bounce_end_io_read(bio, &isa_page_pool);
 215}
 216
 217static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 218                struct bio_set *bs)
 219{
 220        struct bvec_iter iter;
 221        struct bio_vec bv;
 222        struct bio *bio;
 223
 224        /*
 225         * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
 226         * bio_src->bi_io_vec to bio->bi_io_vec.
 227         *
 228         * We can't do that anymore, because:
 229         *
 230         *  - The point of cloning the biovec is to produce a bio with a biovec
 231         *    the caller can modify: bi_idx and bi_bvec_done should be 0.
 232         *
 233         *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
 234         *    we tried to clone the whole thing bio_alloc_bioset() would fail.
 235         *    But the clone should succeed as long as the number of biovecs we
 236         *    actually need to allocate is fewer than BIO_MAX_PAGES.
 237         *
 238         *  - Lastly, bi_vcnt should not be looked at or relied upon by code
 239         *    that does not own the bio - reason being drivers don't use it for
 240         *    iterating over the biovec anymore, so expecting it to be kept up
 241         *    to date (i.e. for clones that share the parent biovec) is just
 242         *    asking for trouble and would force extra work on
 243         *    __bio_clone_fast() anyways.
 244         */
 245
 246        bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
 247        if (!bio)
 248                return NULL;
 249        bio->bi_disk            = bio_src->bi_disk;
 250        bio->bi_opf             = bio_src->bi_opf;
 251        bio->bi_ioprio          = bio_src->bi_ioprio;
 252        bio->bi_write_hint      = bio_src->bi_write_hint;
 253        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
 254        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
 255
 256        switch (bio_op(bio)) {
 257        case REQ_OP_DISCARD:
 258        case REQ_OP_SECURE_ERASE:
 259        case REQ_OP_WRITE_ZEROES:
 260                break;
 261        case REQ_OP_WRITE_SAME:
 262                bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
 263                break;
 264        default:
 265                bio_for_each_segment(bv, bio_src, iter)
 266                        bio->bi_io_vec[bio->bi_vcnt++] = bv;
 267                break;
 268        }
 269
 270        if (bio_integrity(bio_src)) {
 271                int ret;
 272
 273                ret = bio_integrity_clone(bio, bio_src, gfp_mask);
 274                if (ret < 0) {
 275                        bio_put(bio);
 276                        return NULL;
 277                }
 278        }
 279
 280        bio_clone_blkg_association(bio, bio_src);
 281        blkcg_bio_issue_init(bio);
 282
 283        return bio;
 284}
 285
 286static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 287                               mempool_t *pool)
 288{
 289        struct bio *bio;
 290        int rw = bio_data_dir(*bio_orig);
 291        struct bio_vec *to, from;
 292        struct bvec_iter iter;
 293        unsigned i = 0;
 294        bool bounce = false;
 295        int sectors = 0;
 296        bool passthrough = bio_is_passthrough(*bio_orig);
 297
 298        bio_for_each_segment(from, *bio_orig, iter) {
 299                if (i++ < BIO_MAX_PAGES)
 300                        sectors += from.bv_len >> 9;
 301                if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
 302                        bounce = true;
 303        }
 304        if (!bounce)
 305                return;
 306
 307        if (!passthrough && sectors < bio_sectors(*bio_orig)) {
 308                bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
 309                bio_chain(bio, *bio_orig);
 310                generic_make_request(*bio_orig);
 311                *bio_orig = bio;
 312        }
 313        bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
 314                        &bounce_bio_set);
 315
 316        /*
 317         * Bvec table can't be updated by bio_for_each_segment_all(),
 318         * so retrieve bvec from the table directly. This way is safe
 319         * because the 'bio' is single-page bvec.
 320         */
 321        for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) {
 322                struct page *page = to->bv_page;
 323
 324                if (page_to_pfn(page) <= q->limits.bounce_pfn)
 325                        continue;
 326
 327                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
 328                inc_zone_page_state(to->bv_page, NR_BOUNCE);
 329
 330                if (rw == WRITE) {
 331                        char *vto, *vfrom;
 332
 333                        flush_dcache_page(page);
 334
 335                        vto = page_address(to->bv_page) + to->bv_offset;
 336                        vfrom = kmap_atomic(page) + to->bv_offset;
 337                        memcpy(vto, vfrom, to->bv_len);
 338                        kunmap_atomic(vfrom);
 339                }
 340        }
 341
 342        trace_block_bio_bounce(q, *bio_orig);
 343
 344        bio->bi_flags |= (1 << BIO_BOUNCED);
 345
 346        if (pool == &page_pool) {
 347                bio->bi_end_io = bounce_end_io_write;
 348                if (rw == READ)
 349                        bio->bi_end_io = bounce_end_io_read;
 350        } else {
 351                bio->bi_end_io = bounce_end_io_write_isa;
 352                if (rw == READ)
 353                        bio->bi_end_io = bounce_end_io_read_isa;
 354        }
 355
 356        bio->bi_private = *bio_orig;
 357        *bio_orig = bio;
 358}
 359
 360void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 361{
 362        mempool_t *pool;
 363
 364        /*
 365         * Data-less bio, nothing to bounce
 366         */
 367        if (!bio_has_data(*bio_orig))
 368                return;
 369
 370        /*
 371         * for non-isa bounce case, just check if the bounce pfn is equal
 372         * to or bigger than the highest pfn in the system -- in that case,
 373         * don't waste time iterating over bio segments
 374         */
 375        if (!(q->bounce_gfp & GFP_DMA)) {
 376                if (q->limits.bounce_pfn >= blk_max_pfn)
 377                        return;
 378                pool = &page_pool;
 379        } else {
 380                BUG_ON(!mempool_initialized(&isa_page_pool));
 381                pool = &isa_page_pool;
 382        }
 383
 384        /*
 385         * slow path
 386         */
 387        __blk_queue_bounce(q, bio_orig, pool);
 388}
 389