linux/block/blk-map.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Functions related to mapping data to requests
   4 */
   5#include <linux/kernel.h>
   6#include <linux/sched/task_stack.h>
   7#include <linux/module.h>
   8#include <linux/bio.h>
   9#include <linux/blkdev.h>
  10#include <linux/uio.h>
  11
  12#include "blk.h"
  13
  14struct bio_map_data {
  15        bool is_our_pages : 1;
  16        bool is_null_mapped : 1;
  17        struct iov_iter iter;
  18        struct iovec iov[];
  19};
  20
  21static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
  22                                               gfp_t gfp_mask)
  23{
  24        struct bio_map_data *bmd;
  25
  26        if (data->nr_segs > UIO_MAXIOV)
  27                return NULL;
  28
  29        bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
  30        if (!bmd)
  31                return NULL;
  32        memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
  33        bmd->iter = *data;
  34        bmd->iter.iov = bmd->iov;
  35        return bmd;
  36}
  37
  38/**
  39 * bio_copy_from_iter - copy all pages from iov_iter to bio
  40 * @bio: The &struct bio which describes the I/O as destination
  41 * @iter: iov_iter as source
  42 *
  43 * Copy all pages from iov_iter to bio.
  44 * Returns 0 on success, or error on failure.
  45 */
  46static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
  47{
  48        struct bio_vec *bvec;
  49        struct bvec_iter_all iter_all;
  50
  51        bio_for_each_segment_all(bvec, bio, iter_all) {
  52                ssize_t ret;
  53
  54                ret = copy_page_from_iter(bvec->bv_page,
  55                                          bvec->bv_offset,
  56                                          bvec->bv_len,
  57                                          iter);
  58
  59                if (!iov_iter_count(iter))
  60                        break;
  61
  62                if (ret < bvec->bv_len)
  63                        return -EFAULT;
  64        }
  65
  66        return 0;
  67}
  68
  69/**
  70 * bio_copy_to_iter - copy all pages from bio to iov_iter
  71 * @bio: The &struct bio which describes the I/O as source
  72 * @iter: iov_iter as destination
  73 *
  74 * Copy all pages from bio to iov_iter.
  75 * Returns 0 on success, or error on failure.
  76 */
  77static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
  78{
  79        struct bio_vec *bvec;
  80        struct bvec_iter_all iter_all;
  81
  82        bio_for_each_segment_all(bvec, bio, iter_all) {
  83                ssize_t ret;
  84
  85                ret = copy_page_to_iter(bvec->bv_page,
  86                                        bvec->bv_offset,
  87                                        bvec->bv_len,
  88                                        &iter);
  89
  90                if (!iov_iter_count(&iter))
  91                        break;
  92
  93                if (ret < bvec->bv_len)
  94                        return -EFAULT;
  95        }
  96
  97        return 0;
  98}
  99
 100/**
 101 *      bio_uncopy_user -       finish previously mapped bio
 102 *      @bio: bio being terminated
 103 *
 104 *      Free pages allocated from bio_copy_user_iov() and write back data
 105 *      to user space in case of a read.
 106 */
 107static int bio_uncopy_user(struct bio *bio)
 108{
 109        struct bio_map_data *bmd = bio->bi_private;
 110        int ret = 0;
 111
 112        if (!bmd->is_null_mapped) {
 113                /*
 114                 * if we're in a workqueue, the request is orphaned, so
 115                 * don't copy into a random user address space, just free
 116                 * and return -EINTR so user space doesn't expect any data.
 117                 */
 118                if (!current->mm)
 119                        ret = -EINTR;
 120                else if (bio_data_dir(bio) == READ)
 121                        ret = bio_copy_to_iter(bio, bmd->iter);
 122                if (bmd->is_our_pages)
 123                        bio_free_pages(bio);
 124        }
 125        kfree(bmd);
 126        bio_put(bio);
 127        return ret;
 128}
 129
 130static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
 131                struct iov_iter *iter, gfp_t gfp_mask)
 132{
 133        struct bio_map_data *bmd;
 134        struct page *page;
 135        struct bio *bio, *bounce_bio;
 136        int i = 0, ret;
 137        int nr_pages;
 138        unsigned int len = iter->count;
 139        unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
 140
 141        bmd = bio_alloc_map_data(iter, gfp_mask);
 142        if (!bmd)
 143                return -ENOMEM;
 144
 145        /*
 146         * We need to do a deep copy of the iov_iter including the iovecs.
 147         * The caller provided iov might point to an on-stack or otherwise
 148         * shortlived one.
 149         */
 150        bmd->is_our_pages = !map_data;
 151        bmd->is_null_mapped = (map_data && map_data->null_mapped);
 152
 153        nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
 154        if (nr_pages > BIO_MAX_PAGES)
 155                nr_pages = BIO_MAX_PAGES;
 156
 157        ret = -ENOMEM;
 158        bio = bio_kmalloc(gfp_mask, nr_pages);
 159        if (!bio)
 160                goto out_bmd;
 161        bio->bi_opf |= req_op(rq);
 162
 163        if (map_data) {
 164                nr_pages = 1 << map_data->page_order;
 165                i = map_data->offset / PAGE_SIZE;
 166        }
 167        while (len) {
 168                unsigned int bytes = PAGE_SIZE;
 169
 170                bytes -= offset;
 171
 172                if (bytes > len)
 173                        bytes = len;
 174
 175                if (map_data) {
 176                        if (i == map_data->nr_entries * nr_pages) {
 177                                ret = -ENOMEM;
 178                                goto cleanup;
 179                        }
 180
 181                        page = map_data->pages[i / nr_pages];
 182                        page += (i % nr_pages);
 183
 184                        i++;
 185                } else {
 186                        page = alloc_page(rq->q->bounce_gfp | gfp_mask);
 187                        if (!page) {
 188                                ret = -ENOMEM;
 189                                goto cleanup;
 190                        }
 191                }
 192
 193                if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
 194                        if (!map_data)
 195                                __free_page(page);
 196                        break;
 197                }
 198
 199                len -= bytes;
 200                offset = 0;
 201        }
 202
 203        if (map_data)
 204                map_data->offset += bio->bi_iter.bi_size;
 205
 206        /*
 207         * success
 208         */
 209        if ((iov_iter_rw(iter) == WRITE &&
 210             (!map_data || !map_data->null_mapped)) ||
 211            (map_data && map_data->from_user)) {
 212                ret = bio_copy_from_iter(bio, iter);
 213                if (ret)
 214                        goto cleanup;
 215        } else {
 216                if (bmd->is_our_pages)
 217                        zero_fill_bio(bio);
 218                iov_iter_advance(iter, bio->bi_iter.bi_size);
 219        }
 220
 221        bio->bi_private = bmd;
 222
 223        bounce_bio = bio;
 224        ret = blk_rq_append_bio(rq, &bounce_bio);
 225        if (ret)
 226                goto cleanup;
 227
 228        /*
 229         * We link the bounce buffer in and could have to traverse it later, so
 230         * we have to get a ref to prevent it from being freed
 231         */
 232        bio_get(bounce_bio);
 233        return 0;
 234cleanup:
 235        if (!map_data)
 236                bio_free_pages(bio);
 237        bio_put(bio);
 238out_bmd:
 239        kfree(bmd);
 240        return ret;
 241}
 242
 243static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 244                gfp_t gfp_mask)
 245{
 246        unsigned int max_sectors = queue_max_hw_sectors(rq->q);
 247        struct bio *bio, *bounce_bio;
 248        int ret;
 249        int j;
 250
 251        if (!iov_iter_count(iter))
 252                return -EINVAL;
 253
 254        bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
 255        if (!bio)
 256                return -ENOMEM;
 257        bio->bi_opf |= req_op(rq);
 258
 259        while (iov_iter_count(iter)) {
 260                struct page **pages;
 261                ssize_t bytes;
 262                size_t offs, added = 0;
 263                int npages;
 264
 265                bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
 266                if (unlikely(bytes <= 0)) {
 267                        ret = bytes ? bytes : -EFAULT;
 268                        goto out_unmap;
 269                }
 270
 271                npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
 272
 273                if (unlikely(offs & queue_dma_alignment(rq->q))) {
 274                        ret = -EINVAL;
 275                        j = 0;
 276                } else {
 277                        for (j = 0; j < npages; j++) {
 278                                struct page *page = pages[j];
 279                                unsigned int n = PAGE_SIZE - offs;
 280                                bool same_page = false;
 281
 282                                if (n > bytes)
 283                                        n = bytes;
 284
 285                                if (!bio_add_hw_page(rq->q, bio, page, n, offs,
 286                                                     max_sectors, &same_page)) {
 287                                        if (same_page)
 288                                                put_page(page);
 289                                        break;
 290                                }
 291
 292                                added += n;
 293                                bytes -= n;
 294                                offs = 0;
 295                        }
 296                        iov_iter_advance(iter, added);
 297                }
 298                /*
 299                 * release the pages we didn't map into the bio, if any
 300                 */
 301                while (j < npages)
 302                        put_page(pages[j++]);
 303                kvfree(pages);
 304                /* couldn't stuff something into bio? */
 305                if (bytes)
 306                        break;
 307        }
 308
 309        /*
 310         * Subtle: if we end up needing to bounce a bio, it would normally
 311         * disappear when its bi_end_io is run.  However, we need the original
 312         * bio for the unmap, so grab an extra reference to it
 313         */
 314        bio_get(bio);
 315
 316        bounce_bio = bio;
 317        ret = blk_rq_append_bio(rq, &bounce_bio);
 318        if (ret)
 319                goto out_put_orig;
 320
 321        /*
 322         * We link the bounce buffer in and could have to traverse it
 323         * later, so we have to get a ref to prevent it from being freed
 324         */
 325        bio_get(bounce_bio);
 326        return 0;
 327
 328 out_put_orig:
 329        bio_put(bio);
 330 out_unmap:
 331        bio_release_pages(bio, false);
 332        bio_put(bio);
 333        return ret;
 334}
 335
 336/**
 337 *      bio_unmap_user  -       unmap a bio
 338 *      @bio:           the bio being unmapped
 339 *
 340 *      Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
 341 *      process context.
 342 *
 343 *      bio_unmap_user() may sleep.
 344 */
 345static void bio_unmap_user(struct bio *bio)
 346{
 347        bio_release_pages(bio, bio_data_dir(bio) == READ);
 348        bio_put(bio);
 349        bio_put(bio);
 350}
 351
 352static void bio_invalidate_vmalloc_pages(struct bio *bio)
 353{
 354#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 355        if (bio->bi_private && !op_is_write(bio_op(bio))) {
 356                unsigned long i, len = 0;
 357
 358                for (i = 0; i < bio->bi_vcnt; i++)
 359                        len += bio->bi_io_vec[i].bv_len;
 360                invalidate_kernel_vmap_range(bio->bi_private, len);
 361        }
 362#endif
 363}
 364
 365static void bio_map_kern_endio(struct bio *bio)
 366{
 367        bio_invalidate_vmalloc_pages(bio);
 368        bio_put(bio);
 369}
 370
 371/**
 372 *      bio_map_kern    -       map kernel address into bio
 373 *      @q: the struct request_queue for the bio
 374 *      @data: pointer to buffer to map
 375 *      @len: length in bytes
 376 *      @gfp_mask: allocation flags for bio allocation
 377 *
 378 *      Map the kernel address into a bio suitable for io to a block
 379 *      device. Returns an error pointer in case of error.
 380 */
 381static struct bio *bio_map_kern(struct request_queue *q, void *data,
 382                unsigned int len, gfp_t gfp_mask)
 383{
 384        unsigned long kaddr = (unsigned long)data;
 385        unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 386        unsigned long start = kaddr >> PAGE_SHIFT;
 387        const int nr_pages = end - start;
 388        bool is_vmalloc = is_vmalloc_addr(data);
 389        struct page *page;
 390        int offset, i;
 391        struct bio *bio;
 392
 393        bio = bio_kmalloc(gfp_mask, nr_pages);
 394        if (!bio)
 395                return ERR_PTR(-ENOMEM);
 396
 397        if (is_vmalloc) {
 398                flush_kernel_vmap_range(data, len);
 399                bio->bi_private = data;
 400        }
 401
 402        offset = offset_in_page(kaddr);
 403        for (i = 0; i < nr_pages; i++) {
 404                unsigned int bytes = PAGE_SIZE - offset;
 405
 406                if (len <= 0)
 407                        break;
 408
 409                if (bytes > len)
 410                        bytes = len;
 411
 412                if (!is_vmalloc)
 413                        page = virt_to_page(data);
 414                else
 415                        page = vmalloc_to_page(data);
 416                if (bio_add_pc_page(q, bio, page, bytes,
 417                                    offset) < bytes) {
 418                        /* we don't support partial mappings */
 419                        bio_put(bio);
 420                        return ERR_PTR(-EINVAL);
 421                }
 422
 423                data += bytes;
 424                len -= bytes;
 425                offset = 0;
 426        }
 427
 428        bio->bi_end_io = bio_map_kern_endio;
 429        return bio;
 430}
 431
 432static void bio_copy_kern_endio(struct bio *bio)
 433{
 434        bio_free_pages(bio);
 435        bio_put(bio);
 436}
 437
 438static void bio_copy_kern_endio_read(struct bio *bio)
 439{
 440        char *p = bio->bi_private;
 441        struct bio_vec *bvec;
 442        struct bvec_iter_all iter_all;
 443
 444        bio_for_each_segment_all(bvec, bio, iter_all) {
 445                memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
 446                p += bvec->bv_len;
 447        }
 448
 449        bio_copy_kern_endio(bio);
 450}
 451
 452/**
 453 *      bio_copy_kern   -       copy kernel address into bio
 454 *      @q: the struct request_queue for the bio
 455 *      @data: pointer to buffer to copy
 456 *      @len: length in bytes
 457 *      @gfp_mask: allocation flags for bio and page allocation
 458 *      @reading: data direction is READ
 459 *
 460 *      copy the kernel address into a bio suitable for io to a block
 461 *      device. Returns an error pointer in case of error.
 462 */
 463static struct bio *bio_copy_kern(struct request_queue *q, void *data,
 464                unsigned int len, gfp_t gfp_mask, int reading)
 465{
 466        unsigned long kaddr = (unsigned long)data;
 467        unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 468        unsigned long start = kaddr >> PAGE_SHIFT;
 469        struct bio *bio;
 470        void *p = data;
 471        int nr_pages = 0;
 472
 473        /*
 474         * Overflow, abort
 475         */
 476        if (end < start)
 477                return ERR_PTR(-EINVAL);
 478
 479        nr_pages = end - start;
 480        bio = bio_kmalloc(gfp_mask, nr_pages);
 481        if (!bio)
 482                return ERR_PTR(-ENOMEM);
 483
 484        while (len) {
 485                struct page *page;
 486                unsigned int bytes = PAGE_SIZE;
 487
 488                if (bytes > len)
 489                        bytes = len;
 490
 491                page = alloc_page(q->bounce_gfp | gfp_mask);
 492                if (!page)
 493                        goto cleanup;
 494
 495                if (!reading)
 496                        memcpy(page_address(page), p, bytes);
 497
 498                if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
 499                        break;
 500
 501                len -= bytes;
 502                p += bytes;
 503        }
 504
 505        if (reading) {
 506                bio->bi_end_io = bio_copy_kern_endio_read;
 507                bio->bi_private = data;
 508        } else {
 509                bio->bi_end_io = bio_copy_kern_endio;
 510        }
 511
 512        return bio;
 513
 514cleanup:
 515        bio_free_pages(bio);
 516        bio_put(bio);
 517        return ERR_PTR(-ENOMEM);
 518}
 519
 520/*
 521 * Append a bio to a passthrough request.  Only works if the bio can be merged
 522 * into the request based on the driver constraints.
 523 */
 524int blk_rq_append_bio(struct request *rq, struct bio **bio)
 525{
 526        struct bio *orig_bio = *bio;
 527        struct bvec_iter iter;
 528        struct bio_vec bv;
 529        unsigned int nr_segs = 0;
 530
 531        blk_queue_bounce(rq->q, bio);
 532
 533        bio_for_each_bvec(bv, *bio, iter)
 534                nr_segs++;
 535
 536        if (!rq->bio) {
 537                blk_rq_bio_prep(rq, *bio, nr_segs);
 538        } else {
 539                if (!ll_back_merge_fn(rq, *bio, nr_segs)) {
 540                        if (orig_bio != *bio) {
 541                                bio_put(*bio);
 542                                *bio = orig_bio;
 543                        }
 544                        return -EINVAL;
 545                }
 546
 547                rq->biotail->bi_next = *bio;
 548                rq->biotail = *bio;
 549                rq->__data_len += (*bio)->bi_iter.bi_size;
 550                bio_crypt_free_ctx(*bio);
 551        }
 552
 553        return 0;
 554}
 555EXPORT_SYMBOL(blk_rq_append_bio);
 556
 557/**
 558 * blk_rq_map_user_iov - map user data to a request, for passthrough requests
 559 * @q:          request queue where request should be inserted
 560 * @rq:         request to map data to
 561 * @map_data:   pointer to the rq_map_data holding pages (if necessary)
 562 * @iter:       iovec iterator
 563 * @gfp_mask:   memory allocation flags
 564 *
 565 * Description:
 566 *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
 567 *    a kernel bounce buffer is used.
 568 *
 569 *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 570 *    still in process context.
 571 *
 572 *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
 573 *    before being submitted to the device, as pages mapped may be out of
 574 *    reach. It's the callers responsibility to make sure this happens. The
 575 *    original bio must be passed back in to blk_rq_unmap_user() for proper
 576 *    unmapping.
 577 */
 578int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 579                        struct rq_map_data *map_data,
 580                        const struct iov_iter *iter, gfp_t gfp_mask)
 581{
 582        bool copy = false;
 583        unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
 584        struct bio *bio = NULL;
 585        struct iov_iter i;
 586        int ret = -EINVAL;
 587
 588        if (!iter_is_iovec(iter))
 589                goto fail;
 590
 591        if (map_data)
 592                copy = true;
 593        else if (iov_iter_alignment(iter) & align)
 594                copy = true;
 595        else if (queue_virt_boundary(q))
 596                copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
 597
 598        i = *iter;
 599        do {
 600                if (copy)
 601                        ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
 602                else
 603                        ret = bio_map_user_iov(rq, &i, gfp_mask);
 604                if (ret)
 605                        goto unmap_rq;
 606                if (!bio)
 607                        bio = rq->bio;
 608        } while (iov_iter_count(&i));
 609
 610        return 0;
 611
 612unmap_rq:
 613        blk_rq_unmap_user(bio);
 614fail:
 615        rq->bio = NULL;
 616        return ret;
 617}
 618EXPORT_SYMBOL(blk_rq_map_user_iov);
 619
 620int blk_rq_map_user(struct request_queue *q, struct request *rq,
 621                    struct rq_map_data *map_data, void __user *ubuf,
 622                    unsigned long len, gfp_t gfp_mask)
 623{
 624        struct iovec iov;
 625        struct iov_iter i;
 626        int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
 627
 628        if (unlikely(ret < 0))
 629                return ret;
 630
 631        return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
 632}
 633EXPORT_SYMBOL(blk_rq_map_user);
 634
 635/**
 636 * blk_rq_unmap_user - unmap a request with user data
 637 * @bio:               start of bio list
 638 *
 639 * Description:
 640 *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
 641 *    supply the original rq->bio from the blk_rq_map_user() return, since
 642 *    the I/O completion may have changed rq->bio.
 643 */
 644int blk_rq_unmap_user(struct bio *bio)
 645{
 646        struct bio *mapped_bio;
 647        int ret = 0, ret2;
 648
 649        while (bio) {
 650                mapped_bio = bio;
 651                if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
 652                        mapped_bio = bio->bi_private;
 653
 654                if (bio->bi_private) {
 655                        ret2 = bio_uncopy_user(mapped_bio);
 656                        if (ret2 && !ret)
 657                                ret = ret2;
 658                } else {
 659                        bio_unmap_user(mapped_bio);
 660                }
 661
 662                mapped_bio = bio;
 663                bio = bio->bi_next;
 664                bio_put(mapped_bio);
 665        }
 666
 667        return ret;
 668}
 669EXPORT_SYMBOL(blk_rq_unmap_user);
 670
 671/**
 672 * blk_rq_map_kern - map kernel data to a request, for passthrough requests
 673 * @q:          request queue where request should be inserted
 674 * @rq:         request to fill
 675 * @kbuf:       the kernel buffer
 676 * @len:        length of user data
 677 * @gfp_mask:   memory allocation flags
 678 *
 679 * Description:
 680 *    Data will be mapped directly if possible. Otherwise a bounce
 681 *    buffer is used. Can be called multiple times to append multiple
 682 *    buffers.
 683 */
 684int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 685                    unsigned int len, gfp_t gfp_mask)
 686{
 687        int reading = rq_data_dir(rq) == READ;
 688        unsigned long addr = (unsigned long) kbuf;
 689        struct bio *bio, *orig_bio;
 690        int ret;
 691
 692        if (len > (queue_max_hw_sectors(q) << 9))
 693                return -EINVAL;
 694        if (!len || !kbuf)
 695                return -EINVAL;
 696
 697        if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf))
 698                bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 699        else
 700                bio = bio_map_kern(q, kbuf, len, gfp_mask);
 701
 702        if (IS_ERR(bio))
 703                return PTR_ERR(bio);
 704
 705        bio->bi_opf &= ~REQ_OP_MASK;
 706        bio->bi_opf |= req_op(rq);
 707
 708        orig_bio = bio;
 709        ret = blk_rq_append_bio(rq, &bio);
 710        if (unlikely(ret)) {
 711                /* request is too big */
 712                bio_put(orig_bio);
 713                return ret;
 714        }
 715
 716        return 0;
 717}
 718EXPORT_SYMBOL(blk_rq_map_kern);
 719