linux/lib/iov_iter.c
<<
>>
Prefs
   1#include <linux/export.h>
   2#include <linux/bvec.h>
   3#include <linux/uio.h>
   4#include <linux/pagemap.h>
   5#include <linux/highmem.h>
   6#include <linux/slab.h>
   7#include <linux/vmalloc.h>
   8#include <linux/splice.h>
   9#include <net/checksum.h>
  10#include <linux/scatterlist.h>
  11
  12#define PIPE_PARANOIA /* for now */
  13
  14#define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
  15        size_t left;                                    \
  16        size_t wanted = n;                              \
  17        __p = i->iov;                                   \
  18        __v.iov_len = min(n, __p->iov_len - skip);      \
  19        if (likely(__v.iov_len)) {                      \
  20                __v.iov_base = __p->iov_base + skip;    \
  21                left = (STEP);                          \
  22                __v.iov_len -= left;                    \
  23                skip += __v.iov_len;                    \
  24                n -= __v.iov_len;                       \
  25        } else {                                        \
  26                left = 0;                               \
  27        }                                               \
  28        while (unlikely(!left && n)) {                  \
  29                __p++;                                  \
  30                __v.iov_len = min(n, __p->iov_len);     \
  31                if (unlikely(!__v.iov_len))             \
  32                        continue;                       \
  33                __v.iov_base = __p->iov_base;           \
  34                left = (STEP);                          \
  35                __v.iov_len -= left;                    \
  36                skip = __v.iov_len;                     \
  37                n -= __v.iov_len;                       \
  38        }                                               \
  39        n = wanted - n;                                 \
  40}
  41
  42#define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
  43        size_t wanted = n;                              \
  44        __p = i->kvec;                                  \
  45        __v.iov_len = min(n, __p->iov_len - skip);      \
  46        if (likely(__v.iov_len)) {                      \
  47                __v.iov_base = __p->iov_base + skip;    \
  48                (void)(STEP);                           \
  49                skip += __v.iov_len;                    \
  50                n -= __v.iov_len;                       \
  51        }                                               \
  52        while (unlikely(n)) {                           \
  53                __p++;                                  \
  54                __v.iov_len = min(n, __p->iov_len);     \
  55                if (unlikely(!__v.iov_len))             \
  56                        continue;                       \
  57                __v.iov_base = __p->iov_base;           \
  58                (void)(STEP);                           \
  59                skip = __v.iov_len;                     \
  60                n -= __v.iov_len;                       \
  61        }                                               \
  62        n = wanted;                                     \
  63}
  64
  65#define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
  66        struct bvec_iter __start;                       \
  67        __start.bi_size = n;                            \
  68        __start.bi_bvec_done = skip;                    \
  69        __start.bi_idx = 0;                             \
  70        for_each_bvec(__v, i->bvec, __bi, __start) {    \
  71                if (!__v.bv_len)                        \
  72                        continue;                       \
  73                (void)(STEP);                           \
  74        }                                               \
  75}
  76
  77#define iterate_all_kinds(i, n, v, I, B, K) {                   \
  78        if (likely(n)) {                                        \
  79                size_t skip = i->iov_offset;                    \
  80                if (unlikely(i->type & ITER_BVEC)) {            \
  81                        struct bio_vec v;                       \
  82                        struct bvec_iter __bi;                  \
  83                        iterate_bvec(i, n, v, __bi, skip, (B))  \
  84                } else if (unlikely(i->type & ITER_KVEC)) {     \
  85                        const struct kvec *kvec;                \
  86                        struct kvec v;                          \
  87                        iterate_kvec(i, n, v, kvec, skip, (K))  \
  88                } else if (unlikely(i->type & ITER_DISCARD)) {  \
  89                } else {                                        \
  90                        const struct iovec *iov;                \
  91                        struct iovec v;                         \
  92                        iterate_iovec(i, n, v, iov, skip, (I))  \
  93                }                                               \
  94        }                                                       \
  95}
  96
  97#define iterate_and_advance(i, n, v, I, B, K) {                 \
  98        if (unlikely(i->count < n))                             \
  99                n = i->count;                                   \
 100        if (i->count) {                                         \
 101                size_t skip = i->iov_offset;                    \
 102                if (unlikely(i->type & ITER_BVEC)) {            \
 103                        const struct bio_vec *bvec = i->bvec;   \
 104                        struct bio_vec v;                       \
 105                        struct bvec_iter __bi;                  \
 106                        iterate_bvec(i, n, v, __bi, skip, (B))  \
 107                        i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
 108                        i->nr_segs -= i->bvec - bvec;           \
 109                        skip = __bi.bi_bvec_done;               \
 110                } else if (unlikely(i->type & ITER_KVEC)) {     \
 111                        const struct kvec *kvec;                \
 112                        struct kvec v;                          \
 113                        iterate_kvec(i, n, v, kvec, skip, (K))  \
 114                        if (skip == kvec->iov_len) {            \
 115                                kvec++;                         \
 116                                skip = 0;                       \
 117                        }                                       \
 118                        i->nr_segs -= kvec - i->kvec;           \
 119                        i->kvec = kvec;                         \
 120                } else if (unlikely(i->type & ITER_DISCARD)) {  \
 121                        skip += n;                              \
 122                } else {                                        \
 123                        const struct iovec *iov;                \
 124                        struct iovec v;                         \
 125                        iterate_iovec(i, n, v, iov, skip, (I))  \
 126                        if (skip == iov->iov_len) {             \
 127                                iov++;                          \
 128                                skip = 0;                       \
 129                        }                                       \
 130                        i->nr_segs -= iov - i->iov;             \
 131                        i->iov = iov;                           \
 132                }                                               \
 133                i->count -= n;                                  \
 134                i->iov_offset = skip;                           \
 135        }                                                       \
 136}
 137
 138static int copyout(void __user *to, const void *from, size_t n)
 139{
 140        if (access_ok(to, n)) {
 141                kasan_check_read(from, n);
 142                n = raw_copy_to_user(to, from, n);
 143        }
 144        return n;
 145}
 146
 147static int copyin(void *to, const void __user *from, size_t n)
 148{
 149        if (access_ok(from, n)) {
 150                kasan_check_write(to, n);
 151                n = raw_copy_from_user(to, from, n);
 152        }
 153        return n;
 154}
 155
 156static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 157                         struct iov_iter *i)
 158{
 159        size_t skip, copy, left, wanted;
 160        const struct iovec *iov;
 161        char __user *buf;
 162        void *kaddr, *from;
 163
 164        if (unlikely(bytes > i->count))
 165                bytes = i->count;
 166
 167        if (unlikely(!bytes))
 168                return 0;
 169
 170        might_fault();
 171        wanted = bytes;
 172        iov = i->iov;
 173        skip = i->iov_offset;
 174        buf = iov->iov_base + skip;
 175        copy = min(bytes, iov->iov_len - skip);
 176
 177        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
 178                kaddr = kmap_atomic(page);
 179                from = kaddr + offset;
 180
 181                /* first chunk, usually the only one */
 182                left = copyout(buf, from, copy);
 183                copy -= left;
 184                skip += copy;
 185                from += copy;
 186                bytes -= copy;
 187
 188                while (unlikely(!left && bytes)) {
 189                        iov++;
 190                        buf = iov->iov_base;
 191                        copy = min(bytes, iov->iov_len);
 192                        left = copyout(buf, from, copy);
 193                        copy -= left;
 194                        skip = copy;
 195                        from += copy;
 196                        bytes -= copy;
 197                }
 198                if (likely(!bytes)) {
 199                        kunmap_atomic(kaddr);
 200                        goto done;
 201                }
 202                offset = from - kaddr;
 203                buf += copy;
 204                kunmap_atomic(kaddr);
 205                copy = min(bytes, iov->iov_len - skip);
 206        }
 207        /* Too bad - revert to non-atomic kmap */
 208
 209        kaddr = kmap(page);
 210        from = kaddr + offset;
 211        left = copyout(buf, from, copy);
 212        copy -= left;
 213        skip += copy;
 214        from += copy;
 215        bytes -= copy;
 216        while (unlikely(!left && bytes)) {
 217                iov++;
 218                buf = iov->iov_base;
 219                copy = min(bytes, iov->iov_len);
 220                left = copyout(buf, from, copy);
 221                copy -= left;
 222                skip = copy;
 223                from += copy;
 224                bytes -= copy;
 225        }
 226        kunmap(page);
 227
 228done:
 229        if (skip == iov->iov_len) {
 230                iov++;
 231                skip = 0;
 232        }
 233        i->count -= wanted - bytes;
 234        i->nr_segs -= iov - i->iov;
 235        i->iov = iov;
 236        i->iov_offset = skip;
 237        return wanted - bytes;
 238}
 239
 240static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
 241                         struct iov_iter *i)
 242{
 243        size_t skip, copy, left, wanted;
 244        const struct iovec *iov;
 245        char __user *buf;
 246        void *kaddr, *to;
 247
 248        if (unlikely(bytes > i->count))
 249                bytes = i->count;
 250
 251        if (unlikely(!bytes))
 252                return 0;
 253
 254        might_fault();
 255        wanted = bytes;
 256        iov = i->iov;
 257        skip = i->iov_offset;
 258        buf = iov->iov_base + skip;
 259        copy = min(bytes, iov->iov_len - skip);
 260
 261        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
 262                kaddr = kmap_atomic(page);
 263                to = kaddr + offset;
 264
 265                /* first chunk, usually the only one */
 266                left = copyin(to, buf, copy);
 267                copy -= left;
 268                skip += copy;
 269                to += copy;
 270                bytes -= copy;
 271
 272                while (unlikely(!left && bytes)) {
 273                        iov++;
 274                        buf = iov->iov_base;
 275                        copy = min(bytes, iov->iov_len);
 276                        left = copyin(to, buf, copy);
 277                        copy -= left;
 278                        skip = copy;
 279                        to += copy;
 280                        bytes -= copy;
 281                }
 282                if (likely(!bytes)) {
 283                        kunmap_atomic(kaddr);
 284                        goto done;
 285                }
 286                offset = to - kaddr;
 287                buf += copy;
 288                kunmap_atomic(kaddr);
 289                copy = min(bytes, iov->iov_len - skip);
 290        }
 291        /* Too bad - revert to non-atomic kmap */
 292
 293        kaddr = kmap(page);
 294        to = kaddr + offset;
 295        left = copyin(to, buf, copy);
 296        copy -= left;
 297        skip += copy;
 298        to += copy;
 299        bytes -= copy;
 300        while (unlikely(!left && bytes)) {
 301                iov++;
 302                buf = iov->iov_base;
 303                copy = min(bytes, iov->iov_len);
 304                left = copyin(to, buf, copy);
 305                copy -= left;
 306                skip = copy;
 307                to += copy;
 308                bytes -= copy;
 309        }
 310        kunmap(page);
 311
 312done:
 313        if (skip == iov->iov_len) {
 314                iov++;
 315                skip = 0;
 316        }
 317        i->count -= wanted - bytes;
 318        i->nr_segs -= iov - i->iov;
 319        i->iov = iov;
 320        i->iov_offset = skip;
 321        return wanted - bytes;
 322}
 323
 324#ifdef PIPE_PARANOIA
 325static bool sanity(const struct iov_iter *i)
 326{
 327        struct pipe_inode_info *pipe = i->pipe;
 328        int idx = i->idx;
 329        int next = pipe->curbuf + pipe->nrbufs;
 330        if (i->iov_offset) {
 331                struct pipe_buffer *p;
 332                if (unlikely(!pipe->nrbufs))
 333                        goto Bad;       // pipe must be non-empty
 334                if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
 335                        goto Bad;       // must be at the last buffer...
 336
 337                p = &pipe->bufs[idx];
 338                if (unlikely(p->offset + p->len != i->iov_offset))
 339                        goto Bad;       // ... at the end of segment
 340        } else {
 341                if (idx != (next & (pipe->buffers - 1)))
 342                        goto Bad;       // must be right after the last buffer
 343        }
 344        return true;
 345Bad:
 346        printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
 347        printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
 348                        pipe->curbuf, pipe->nrbufs, pipe->buffers);
 349        for (idx = 0; idx < pipe->buffers; idx++)
 350                printk(KERN_ERR "[%p %p %d %d]\n",
 351                        pipe->bufs[idx].ops,
 352                        pipe->bufs[idx].page,
 353                        pipe->bufs[idx].offset,
 354                        pipe->bufs[idx].len);
 355        WARN_ON(1);
 356        return false;
 357}
 358#else
 359#define sanity(i) true
 360#endif
 361
 362static inline int next_idx(int idx, struct pipe_inode_info *pipe)
 363{
 364        return (idx + 1) & (pipe->buffers - 1);
 365}
 366
 367static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
 368                         struct iov_iter *i)
 369{
 370        struct pipe_inode_info *pipe = i->pipe;
 371        struct pipe_buffer *buf;
 372        size_t off;
 373        int idx;
 374
 375        if (unlikely(bytes > i->count))
 376                bytes = i->count;
 377
 378        if (unlikely(!bytes))
 379                return 0;
 380
 381        if (!sanity(i))
 382                return 0;
 383
 384        off = i->iov_offset;
 385        idx = i->idx;
 386        buf = &pipe->bufs[idx];
 387        if (off) {
 388                if (offset == off && buf->page == page) {
 389                        /* merge with the last one */
 390                        buf->len += bytes;
 391                        i->iov_offset += bytes;
 392                        goto out;
 393                }
 394                idx = next_idx(idx, pipe);
 395                buf = &pipe->bufs[idx];
 396        }
 397        if (idx == pipe->curbuf && pipe->nrbufs)
 398                return 0;
 399        pipe->nrbufs++;
 400        buf->ops = &page_cache_pipe_buf_ops;
 401        buf->flags = 0;
 402        get_page(buf->page = page);
 403        buf->offset = offset;
 404        buf->len = bytes;
 405        i->iov_offset = offset + bytes;
 406        i->idx = idx;
 407out:
 408        i->count -= bytes;
 409        return bytes;
 410}
 411
 412/*
 413 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
 414 * bytes.  For each iovec, fault in each page that constitutes the iovec.
 415 *
 416 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
 417 * because it is an invalid address).
 418 */
 419int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 420{
 421        size_t skip = i->iov_offset;
 422        const struct iovec *iov;
 423        int err;
 424        struct iovec v;
 425
 426        if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
 427                iterate_iovec(i, bytes, v, iov, skip, ({
 428                        err = fault_in_pages_readable(v.iov_base, v.iov_len);
 429                        if (unlikely(err))
 430                        return err;
 431                0;}))
 432        }
 433        return 0;
 434}
 435EXPORT_SYMBOL(iov_iter_fault_in_readable);
 436
 437void iov_iter_init(struct iov_iter *i, unsigned int direction,
 438                        const struct iovec *iov, unsigned long nr_segs,
 439                        size_t count)
 440{
 441        WARN_ON(direction & ~(READ | WRITE));
 442        direction &= READ | WRITE;
 443
 444        /* It will get better.  Eventually... */
 445        if (uaccess_kernel()) {
 446                i->type = ITER_KVEC | direction;
 447                i->kvec = (struct kvec *)iov;
 448        } else {
 449                i->type = ITER_IOVEC | direction;
 450                i->iov = iov;
 451        }
 452        i->nr_segs = nr_segs;
 453        i->iov_offset = 0;
 454        i->count = count;
 455}
 456EXPORT_SYMBOL(iov_iter_init);
 457
 458static inline bool allocated(struct pipe_buffer *buf)
 459{
 460        return buf->ops == &default_pipe_buf_ops;
 461}
 462
 463static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
 464{
 465        size_t off = i->iov_offset;
 466        int idx = i->idx;
 467        if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
 468                idx = next_idx(idx, i->pipe);
 469                off = 0;
 470        }
 471        *idxp = idx;
 472        *offp = off;
 473}
 474
 475static size_t push_pipe(struct iov_iter *i, size_t size,
 476                        int *idxp, size_t *offp)
 477{
 478        struct pipe_inode_info *pipe = i->pipe;
 479        size_t off;
 480        int idx;
 481        ssize_t left;
 482
 483        if (unlikely(size > i->count))
 484                size = i->count;
 485        if (unlikely(!size))
 486                return 0;
 487
 488        left = size;
 489        data_start(i, &idx, &off);
 490        *idxp = idx;
 491        *offp = off;
 492        if (off) {
 493                left -= PAGE_SIZE - off;
 494                if (left <= 0) {
 495                        pipe->bufs[idx].len += size;
 496                        return size;
 497                }
 498                pipe->bufs[idx].len = PAGE_SIZE;
 499                idx = next_idx(idx, pipe);
 500        }
 501        while (idx != pipe->curbuf || !pipe->nrbufs) {
 502                struct page *page = alloc_page(GFP_USER);
 503                if (!page)
 504                        break;
 505                pipe->nrbufs++;
 506                pipe->bufs[idx].ops = &default_pipe_buf_ops;
 507                pipe->bufs[idx].flags = 0;
 508                pipe->bufs[idx].page = page;
 509                pipe->bufs[idx].offset = 0;
 510                if (left <= PAGE_SIZE) {
 511                        pipe->bufs[idx].len = left;
 512                        return size;
 513                }
 514                pipe->bufs[idx].len = PAGE_SIZE;
 515                left -= PAGE_SIZE;
 516                idx = next_idx(idx, pipe);
 517        }
 518        return size - left;
 519}
 520
 521static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 522                                struct iov_iter *i)
 523{
 524        struct pipe_inode_info *pipe = i->pipe;
 525        size_t n, off;
 526        int idx;
 527
 528        if (!sanity(i))
 529                return 0;
 530
 531        bytes = n = push_pipe(i, bytes, &idx, &off);
 532        if (unlikely(!n))
 533                return 0;
 534        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 535                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 536                memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
 537                i->idx = idx;
 538                i->iov_offset = off + chunk;
 539                n -= chunk;
 540                addr += chunk;
 541        }
 542        i->count -= bytes;
 543        return bytes;
 544}
 545
 546size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 547{
 548        const char *from = addr;
 549        if (unlikely(iov_iter_is_pipe(i)))
 550                return copy_pipe_to_iter(addr, bytes, i);
 551        if (iter_is_iovec(i))
 552                might_fault();
 553        iterate_and_advance(i, bytes, v,
 554                copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 555                memcpy_to_page(v.bv_page, v.bv_offset,
 556                               (from += v.bv_len) - v.bv_len, v.bv_len),
 557                memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
 558        )
 559
 560        return bytes;
 561}
 562EXPORT_SYMBOL(_copy_to_iter);
 563
 564#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
 565static int copyout_mcsafe(void __user *to, const void *from, size_t n)
 566{
 567        if (access_ok(to, n)) {
 568                kasan_check_read(from, n);
 569                n = copy_to_user_mcsafe((__force void *) to, from, n);
 570        }
 571        return n;
 572}
 573
 574static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
 575                const char *from, size_t len)
 576{
 577        unsigned long ret;
 578        char *to;
 579
 580        to = kmap_atomic(page);
 581        ret = memcpy_mcsafe(to + offset, from, len);
 582        kunmap_atomic(to);
 583
 584        return ret;
 585}
 586
 587static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 588                                struct iov_iter *i)
 589{
 590        struct pipe_inode_info *pipe = i->pipe;
 591        size_t n, off, xfer = 0;
 592        int idx;
 593
 594        if (!sanity(i))
 595                return 0;
 596
 597        bytes = n = push_pipe(i, bytes, &idx, &off);
 598        if (unlikely(!n))
 599                return 0;
 600        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 601                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 602                unsigned long rem;
 603
 604                rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
 605                                chunk);
 606                i->idx = idx;
 607                i->iov_offset = off + chunk - rem;
 608                xfer += chunk - rem;
 609                if (rem)
 610                        break;
 611                n -= chunk;
 612                addr += chunk;
 613        }
 614        i->count -= xfer;
 615        return xfer;
 616}
 617
 618/**
 619 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
 620 * @addr: source kernel address
 621 * @bytes: total transfer length
 622 * @iter: destination iterator
 623 *
 624 * The pmem driver arranges for filesystem-dax to use this facility via
 625 * dax_copy_to_iter() for protecting read/write to persistent memory.
 626 * Unless / until an architecture can guarantee identical performance
 627 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
 628 * performance regression to switch more users to the mcsafe version.
 629 *
 630 * Otherwise, the main differences between this and typical _copy_to_iter().
 631 *
 632 * * Typical tail/residue handling after a fault retries the copy
 633 *   byte-by-byte until the fault happens again. Re-triggering machine
 634 *   checks is potentially fatal so the implementation uses source
 635 *   alignment and poison alignment assumptions to avoid re-triggering
 636 *   hardware exceptions.
 637 *
 638 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
 639 *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
 640 *   a short copy.
 641 *
 642 * See MCSAFE_TEST for self-test.
 643 */
 644size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 645{
 646        const char *from = addr;
 647        unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 648
 649        if (unlikely(iov_iter_is_pipe(i)))
 650                return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 651        if (iter_is_iovec(i))
 652                might_fault();
 653        iterate_and_advance(i, bytes, v,
 654                copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 655                ({
 656                rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
 657                               (from += v.bv_len) - v.bv_len, v.bv_len);
 658                if (rem) {
 659                        curr_addr = (unsigned long) from;
 660                        bytes = curr_addr - s_addr - rem;
 661                        return bytes;
 662                }
 663                }),
 664                ({
 665                rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
 666                                v.iov_len);
 667                if (rem) {
 668                        curr_addr = (unsigned long) from;
 669                        bytes = curr_addr - s_addr - rem;
 670                        return bytes;
 671                }
 672                })
 673        )
 674
 675        return bytes;
 676}
 677EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 678#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
 679
 680size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 681{
 682        char *to = addr;
 683        if (unlikely(iov_iter_is_pipe(i))) {
 684                WARN_ON(1);
 685                return 0;
 686        }
 687        if (iter_is_iovec(i))
 688                might_fault();
 689        iterate_and_advance(i, bytes, v,
 690                copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
 691                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 692                                 v.bv_offset, v.bv_len),
 693                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 694        )
 695
 696        return bytes;
 697}
 698EXPORT_SYMBOL(_copy_from_iter);
 699
 700bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 701{
 702        char *to = addr;
 703        if (unlikely(iov_iter_is_pipe(i))) {
 704                WARN_ON(1);
 705                return false;
 706        }
 707        if (unlikely(i->count < bytes))
 708                return false;
 709
 710        if (iter_is_iovec(i))
 711                might_fault();
 712        iterate_all_kinds(i, bytes, v, ({
 713                if (copyin((to += v.iov_len) - v.iov_len,
 714                                      v.iov_base, v.iov_len))
 715                        return false;
 716                0;}),
 717                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 718                                 v.bv_offset, v.bv_len),
 719                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 720        )
 721
 722        iov_iter_advance(i, bytes);
 723        return true;
 724}
 725EXPORT_SYMBOL(_copy_from_iter_full);
 726
 727size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 728{
 729        char *to = addr;
 730        if (unlikely(iov_iter_is_pipe(i))) {
 731                WARN_ON(1);
 732                return 0;
 733        }
 734        iterate_and_advance(i, bytes, v,
 735                __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 736                                         v.iov_base, v.iov_len),
 737                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 738                                 v.bv_offset, v.bv_len),
 739                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 740        )
 741
 742        return bytes;
 743}
 744EXPORT_SYMBOL(_copy_from_iter_nocache);
 745
 746#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 747/**
 748 * _copy_from_iter_flushcache - write destination through cpu cache
 749 * @addr: destination kernel address
 750 * @bytes: total transfer length
 751 * @iter: source iterator
 752 *
 753 * The pmem driver arranges for filesystem-dax to use this facility via
 754 * dax_copy_from_iter() for ensuring that writes to persistent memory
 755 * are flushed through the CPU cache. It is differentiated from
 756 * _copy_from_iter_nocache() in that guarantees all data is flushed for
 757 * all iterator types. The _copy_from_iter_nocache() only attempts to
 758 * bypass the cache for the ITER_IOVEC case, and on some archs may use
 759 * instructions that strand dirty-data in the cache.
 760 */
 761size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 762{
 763        char *to = addr;
 764        if (unlikely(iov_iter_is_pipe(i))) {
 765                WARN_ON(1);
 766                return 0;
 767        }
 768        iterate_and_advance(i, bytes, v,
 769                __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
 770                                         v.iov_base, v.iov_len),
 771                memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
 772                                 v.bv_offset, v.bv_len),
 773                memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
 774                        v.iov_len)
 775        )
 776
 777        return bytes;
 778}
 779EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 780#endif
 781
 782bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 783{
 784        char *to = addr;
 785        if (unlikely(iov_iter_is_pipe(i))) {
 786                WARN_ON(1);
 787                return false;
 788        }
 789        if (unlikely(i->count < bytes))
 790                return false;
 791        iterate_all_kinds(i, bytes, v, ({
 792                if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 793                                             v.iov_base, v.iov_len))
 794                        return false;
 795                0;}),
 796                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 797                                 v.bv_offset, v.bv_len),
 798                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 799        )
 800
 801        iov_iter_advance(i, bytes);
 802        return true;
 803}
 804EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 805
 806static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 807{
 808        struct page *head;
 809        size_t v = n + offset;
 810
 811        /*
 812         * The general case needs to access the page order in order
 813         * to compute the page size.
 814         * However, we mostly deal with order-0 pages and thus can
 815         * avoid a possible cache line miss for requests that fit all
 816         * page orders.
 817         */
 818        if (n <= v && v <= PAGE_SIZE)
 819                return true;
 820
 821        head = compound_head(page);
 822        v += (page - head) << PAGE_SHIFT;
 823
 824        if (likely(n <= v && v <= (page_size(head))))
 825                return true;
 826        WARN_ON(1);
 827        return false;
 828}
 829
 830size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 831                         struct iov_iter *i)
 832{
 833        if (unlikely(!page_copy_sane(page, offset, bytes)))
 834                return 0;
 835        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 836                void *kaddr = kmap_atomic(page);
 837                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 838                kunmap_atomic(kaddr);
 839                return wanted;
 840        } else if (unlikely(iov_iter_is_discard(i)))
 841                return bytes;
 842        else if (likely(!iov_iter_is_pipe(i)))
 843                return copy_page_to_iter_iovec(page, offset, bytes, i);
 844        else
 845                return copy_page_to_iter_pipe(page, offset, bytes, i);
 846}
 847EXPORT_SYMBOL(copy_page_to_iter);
 848
 849size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 850                         struct iov_iter *i)
 851{
 852        if (unlikely(!page_copy_sane(page, offset, bytes)))
 853                return 0;
 854        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 855                WARN_ON(1);
 856                return 0;
 857        }
 858        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 859                void *kaddr = kmap_atomic(page);
 860                size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
 861                kunmap_atomic(kaddr);
 862                return wanted;
 863        } else
 864                return copy_page_from_iter_iovec(page, offset, bytes, i);
 865}
 866EXPORT_SYMBOL(copy_page_from_iter);
 867
 868static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 869{
 870        struct pipe_inode_info *pipe = i->pipe;
 871        size_t n, off;
 872        int idx;
 873
 874        if (!sanity(i))
 875                return 0;
 876
 877        bytes = n = push_pipe(i, bytes, &idx, &off);
 878        if (unlikely(!n))
 879                return 0;
 880
 881        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 882                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 883                memzero_page(pipe->bufs[idx].page, off, chunk);
 884                i->idx = idx;
 885                i->iov_offset = off + chunk;
 886                n -= chunk;
 887        }
 888        i->count -= bytes;
 889        return bytes;
 890}
 891
 892size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 893{
 894        if (unlikely(iov_iter_is_pipe(i)))
 895                return pipe_zero(bytes, i);
 896        iterate_and_advance(i, bytes, v,
 897                clear_user(v.iov_base, v.iov_len),
 898                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
 899                memset(v.iov_base, 0, v.iov_len)
 900        )
 901
 902        return bytes;
 903}
 904EXPORT_SYMBOL(iov_iter_zero);
 905
 906size_t iov_iter_copy_from_user_atomic(struct page *page,
 907                struct iov_iter *i, unsigned long offset, size_t bytes)
 908{
 909        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
 910        if (unlikely(!page_copy_sane(page, offset, bytes))) {
 911                kunmap_atomic(kaddr);
 912                return 0;
 913        }
 914        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 915                kunmap_atomic(kaddr);
 916                WARN_ON(1);
 917                return 0;
 918        }
 919        iterate_all_kinds(i, bytes, v,
 920                copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
 921                memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
 922                                 v.bv_offset, v.bv_len),
 923                memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 924        )
 925        kunmap_atomic(kaddr);
 926        return bytes;
 927}
 928EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 929
 930static inline void pipe_truncate(struct iov_iter *i)
 931{
 932        struct pipe_inode_info *pipe = i->pipe;
 933        if (pipe->nrbufs) {
 934                size_t off = i->iov_offset;
 935                int idx = i->idx;
 936                int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
 937                if (off) {
 938                        pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
 939                        idx = next_idx(idx, pipe);
 940                        nrbufs++;
 941                }
 942                while (pipe->nrbufs > nrbufs) {
 943                        pipe_buf_release(pipe, &pipe->bufs[idx]);
 944                        idx = next_idx(idx, pipe);
 945                        pipe->nrbufs--;
 946                }
 947        }
 948}
 949
 950static void pipe_advance(struct iov_iter *i, size_t size)
 951{
 952        struct pipe_inode_info *pipe = i->pipe;
 953        if (unlikely(i->count < size))
 954                size = i->count;
 955        if (size) {
 956                struct pipe_buffer *buf;
 957                size_t off = i->iov_offset, left = size;
 958                int idx = i->idx;
 959                if (off) /* make it relative to the beginning of buffer */
 960                        left += off - pipe->bufs[idx].offset;
 961                while (1) {
 962                        buf = &pipe->bufs[idx];
 963                        if (left <= buf->len)
 964                                break;
 965                        left -= buf->len;
 966                        idx = next_idx(idx, pipe);
 967                }
 968                i->idx = idx;
 969                i->iov_offset = buf->offset + left;
 970        }
 971        i->count -= size;
 972        /* ... and discard everything past that point */
 973        pipe_truncate(i);
 974}
 975
 976void iov_iter_advance(struct iov_iter *i, size_t size)
 977{
 978        if (unlikely(iov_iter_is_pipe(i))) {
 979                pipe_advance(i, size);
 980                return;
 981        }
 982        if (unlikely(iov_iter_is_discard(i))) {
 983                i->count -= size;
 984                return;
 985        }
 986        iterate_and_advance(i, size, v, 0, 0, 0)
 987}
 988EXPORT_SYMBOL(iov_iter_advance);
 989
 990void iov_iter_revert(struct iov_iter *i, size_t unroll)
 991{
 992        if (!unroll)
 993                return;
 994        if (WARN_ON(unroll > MAX_RW_COUNT))
 995                return;
 996        i->count += unroll;
 997        if (unlikely(iov_iter_is_pipe(i))) {
 998                struct pipe_inode_info *pipe = i->pipe;
 999                int idx = i->idx;
1000                size_t off = i->iov_offset;
1001                while (1) {
1002                        size_t n = off - pipe->bufs[idx].offset;
1003                        if (unroll < n) {
1004                                off -= unroll;
1005                                break;
1006                        }
1007                        unroll -= n;
1008                        if (!unroll && idx == i->start_idx) {
1009                                off = 0;
1010                                break;
1011                        }
1012                        if (!idx--)
1013                                idx = pipe->buffers - 1;
1014                        off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1015                }
1016                i->iov_offset = off;
1017                i->idx = idx;
1018                pipe_truncate(i);
1019                return;
1020        }
1021        if (unlikely(iov_iter_is_discard(i)))
1022                return;
1023        if (unroll <= i->iov_offset) {
1024                i->iov_offset -= unroll;
1025                return;
1026        }
1027        unroll -= i->iov_offset;
1028        if (iov_iter_is_bvec(i)) {
1029                const struct bio_vec *bvec = i->bvec;
1030                while (1) {
1031                        size_t n = (--bvec)->bv_len;
1032                        i->nr_segs++;
1033                        if (unroll <= n) {
1034                                i->bvec = bvec;
1035                                i->iov_offset = n - unroll;
1036                                return;
1037                        }
1038                        unroll -= n;
1039                }
1040        } else { /* same logics for iovec and kvec */
1041                const struct iovec *iov = i->iov;
1042                while (1) {
1043                        size_t n = (--iov)->iov_len;
1044                        i->nr_segs++;
1045                        if (unroll <= n) {
1046                                i->iov = iov;
1047                                i->iov_offset = n - unroll;
1048                                return;
1049                        }
1050                        unroll -= n;
1051                }
1052        }
1053}
1054EXPORT_SYMBOL(iov_iter_revert);
1055
1056/*
1057 * Return the count of just the current iov_iter segment.
1058 */
1059size_t iov_iter_single_seg_count(const struct iov_iter *i)
1060{
1061        if (unlikely(iov_iter_is_pipe(i)))
1062                return i->count;        // it is a silly place, anyway
1063        if (i->nr_segs == 1)
1064                return i->count;
1065        if (unlikely(iov_iter_is_discard(i)))
1066                return i->count;
1067        else if (iov_iter_is_bvec(i))
1068                return min(i->count, i->bvec->bv_len - i->iov_offset);
1069        else
1070                return min(i->count, i->iov->iov_len - i->iov_offset);
1071}
1072EXPORT_SYMBOL(iov_iter_single_seg_count);
1073
1074void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1075                        const struct kvec *kvec, unsigned long nr_segs,
1076                        size_t count)
1077{
1078        WARN_ON(direction & ~(READ | WRITE));
1079        i->type = ITER_KVEC | (direction & (READ | WRITE));
1080        i->kvec = kvec;
1081        i->nr_segs = nr_segs;
1082        i->iov_offset = 0;
1083        i->count = count;
1084}
1085EXPORT_SYMBOL(iov_iter_kvec);
1086
1087void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1088                        const struct bio_vec *bvec, unsigned long nr_segs,
1089                        size_t count)
1090{
1091        WARN_ON(direction & ~(READ | WRITE));
1092        i->type = ITER_BVEC | (direction & (READ | WRITE));
1093        i->bvec = bvec;
1094        i->nr_segs = nr_segs;
1095        i->iov_offset = 0;
1096        i->count = count;
1097}
1098EXPORT_SYMBOL(iov_iter_bvec);
1099
1100void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1101                        struct pipe_inode_info *pipe,
1102                        size_t count)
1103{
1104        BUG_ON(direction != READ);
1105        WARN_ON(pipe->nrbufs == pipe->buffers);
1106        i->type = ITER_PIPE | READ;
1107        i->pipe = pipe;
1108        i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1109        i->iov_offset = 0;
1110        i->count = count;
1111        i->start_idx = i->idx;
1112}
1113EXPORT_SYMBOL(iov_iter_pipe);
1114
1115/**
1116 * iov_iter_discard - Initialise an I/O iterator that discards data
1117 * @i: The iterator to initialise.
1118 * @direction: The direction of the transfer.
1119 * @count: The size of the I/O buffer in bytes.
1120 *
1121 * Set up an I/O iterator that just discards everything that's written to it.
1122 * It's only available as a READ iterator.
1123 */
1124void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1125{
1126        BUG_ON(direction != READ);
1127        i->type = ITER_DISCARD | READ;
1128        i->count = count;
1129        i->iov_offset = 0;
1130}
1131EXPORT_SYMBOL(iov_iter_discard);
1132
1133unsigned long iov_iter_alignment(const struct iov_iter *i)
1134{
1135        unsigned long res = 0;
1136        size_t size = i->count;
1137
1138        if (unlikely(iov_iter_is_pipe(i))) {
1139                if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1140                        return size | i->iov_offset;
1141                return size;
1142        }
1143        iterate_all_kinds(i, size, v,
1144                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1145                res |= v.bv_offset | v.bv_len,
1146                res |= (unsigned long)v.iov_base | v.iov_len
1147        )
1148        return res;
1149}
1150EXPORT_SYMBOL(iov_iter_alignment);
1151
1152unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1153{
1154        unsigned long res = 0;
1155        size_t size = i->count;
1156
1157        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1158                WARN_ON(1);
1159                return ~0U;
1160        }
1161
1162        iterate_all_kinds(i, size, v,
1163                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1164                        (size != v.iov_len ? size : 0), 0),
1165                (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1166                        (size != v.bv_len ? size : 0)),
1167                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1168                        (size != v.iov_len ? size : 0))
1169                );
1170        return res;
1171}
1172EXPORT_SYMBOL(iov_iter_gap_alignment);
1173
1174static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1175                                size_t maxsize,
1176                                struct page **pages,
1177                                int idx,
1178                                size_t *start)
1179{
1180        struct pipe_inode_info *pipe = i->pipe;
1181        ssize_t n = push_pipe(i, maxsize, &idx, start);
1182        if (!n)
1183                return -EFAULT;
1184
1185        maxsize = n;
1186        n += *start;
1187        while (n > 0) {
1188                get_page(*pages++ = pipe->bufs[idx].page);
1189                idx = next_idx(idx, pipe);
1190                n -= PAGE_SIZE;
1191        }
1192
1193        return maxsize;
1194}
1195
1196static ssize_t pipe_get_pages(struct iov_iter *i,
1197                   struct page **pages, size_t maxsize, unsigned maxpages,
1198                   size_t *start)
1199{
1200        unsigned npages;
1201        size_t capacity;
1202        int idx;
1203
1204        if (!maxsize)
1205                return 0;
1206
1207        if (!sanity(i))
1208                return -EFAULT;
1209
1210        data_start(i, &idx, start);
1211        /* some of this one + all after this one */
1212        npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1213        capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1214
1215        return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1216}
1217
1218ssize_t iov_iter_get_pages(struct iov_iter *i,
1219                   struct page **pages, size_t maxsize, unsigned maxpages,
1220                   size_t *start)
1221{
1222        if (maxsize > i->count)
1223                maxsize = i->count;
1224
1225        if (unlikely(iov_iter_is_pipe(i)))
1226                return pipe_get_pages(i, pages, maxsize, maxpages, start);
1227        if (unlikely(iov_iter_is_discard(i)))
1228                return -EFAULT;
1229
1230        iterate_all_kinds(i, maxsize, v, ({
1231                unsigned long addr = (unsigned long)v.iov_base;
1232                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1233                int n;
1234                int res;
1235
1236                if (len > maxpages * PAGE_SIZE)
1237                        len = maxpages * PAGE_SIZE;
1238                addr &= ~(PAGE_SIZE - 1);
1239                n = DIV_ROUND_UP(len, PAGE_SIZE);
1240                res = get_user_pages_fast(addr, n,
1241                                iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1242                                pages);
1243                if (unlikely(res < 0))
1244                        return res;
1245                return (res == n ? len : res * PAGE_SIZE) - *start;
1246        0;}),({
1247                /* can't be more than PAGE_SIZE */
1248                *start = v.bv_offset;
1249                get_page(*pages = v.bv_page);
1250                return v.bv_len;
1251        }),({
1252                return -EFAULT;
1253        })
1254        )
1255        return 0;
1256}
1257EXPORT_SYMBOL(iov_iter_get_pages);
1258
1259static struct page **get_pages_array(size_t n)
1260{
1261        return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1262}
1263
1264static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1265                   struct page ***pages, size_t maxsize,
1266                   size_t *start)
1267{
1268        struct page **p;
1269        ssize_t n;
1270        int idx;
1271        int npages;
1272
1273        if (!maxsize)
1274                return 0;
1275
1276        if (!sanity(i))
1277                return -EFAULT;
1278
1279        data_start(i, &idx, start);
1280        /* some of this one + all after this one */
1281        npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1282        n = npages * PAGE_SIZE - *start;
1283        if (maxsize > n)
1284                maxsize = n;
1285        else
1286                npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1287        p = get_pages_array(npages);
1288        if (!p)
1289                return -ENOMEM;
1290        n = __pipe_get_pages(i, maxsize, p, idx, start);
1291        if (n > 0)
1292                *pages = p;
1293        else
1294                kvfree(p);
1295        return n;
1296}
1297
1298ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1299                   struct page ***pages, size_t maxsize,
1300                   size_t *start)
1301{
1302        struct page **p;
1303
1304        if (maxsize > i->count)
1305                maxsize = i->count;
1306
1307        if (unlikely(iov_iter_is_pipe(i)))
1308                return pipe_get_pages_alloc(i, pages, maxsize, start);
1309        if (unlikely(iov_iter_is_discard(i)))
1310                return -EFAULT;
1311
1312        iterate_all_kinds(i, maxsize, v, ({
1313                unsigned long addr = (unsigned long)v.iov_base;
1314                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1315                int n;
1316                int res;
1317
1318                addr &= ~(PAGE_SIZE - 1);
1319                n = DIV_ROUND_UP(len, PAGE_SIZE);
1320                p = get_pages_array(n);
1321                if (!p)
1322                        return -ENOMEM;
1323                res = get_user_pages_fast(addr, n,
1324                                iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1325                if (unlikely(res < 0)) {
1326                        kvfree(p);
1327                        return res;
1328                }
1329                *pages = p;
1330                return (res == n ? len : res * PAGE_SIZE) - *start;
1331        0;}),({
1332                /* can't be more than PAGE_SIZE */
1333                *start = v.bv_offset;
1334                *pages = p = get_pages_array(1);
1335                if (!p)
1336                        return -ENOMEM;
1337                get_page(*p = v.bv_page);
1338                return v.bv_len;
1339        }),({
1340                return -EFAULT;
1341        })
1342        )
1343        return 0;
1344}
1345EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1346
1347size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1348                               struct iov_iter *i)
1349{
1350        char *to = addr;
1351        __wsum sum, next;
1352        size_t off = 0;
1353        sum = *csum;
1354        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1355                WARN_ON(1);
1356                return 0;
1357        }
1358        iterate_and_advance(i, bytes, v, ({
1359                int err = 0;
1360                next = csum_and_copy_from_user(v.iov_base,
1361                                               (to += v.iov_len) - v.iov_len,
1362                                               v.iov_len, 0, &err);
1363                if (!err) {
1364                        sum = csum_block_add(sum, next, off);
1365                        off += v.iov_len;
1366                }
1367                err ? v.iov_len : 0;
1368        }), ({
1369                char *p = kmap_atomic(v.bv_page);
1370                next = csum_partial_copy_nocheck(p + v.bv_offset,
1371                                                 (to += v.bv_len) - v.bv_len,
1372                                                 v.bv_len, 0);
1373                kunmap_atomic(p);
1374                sum = csum_block_add(sum, next, off);
1375                off += v.bv_len;
1376        }),({
1377                next = csum_partial_copy_nocheck(v.iov_base,
1378                                                 (to += v.iov_len) - v.iov_len,
1379                                                 v.iov_len, 0);
1380                sum = csum_block_add(sum, next, off);
1381                off += v.iov_len;
1382        })
1383        )
1384        *csum = sum;
1385        return bytes;
1386}
1387EXPORT_SYMBOL(csum_and_copy_from_iter);
1388
1389bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1390                               struct iov_iter *i)
1391{
1392        char *to = addr;
1393        __wsum sum, next;
1394        size_t off = 0;
1395        sum = *csum;
1396        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1397                WARN_ON(1);
1398                return false;
1399        }
1400        if (unlikely(i->count < bytes))
1401                return false;
1402        iterate_all_kinds(i, bytes, v, ({
1403                int err = 0;
1404                next = csum_and_copy_from_user(v.iov_base,
1405                                               (to += v.iov_len) - v.iov_len,
1406                                               v.iov_len, 0, &err);
1407                if (err)
1408                        return false;
1409                sum = csum_block_add(sum, next, off);
1410                off += v.iov_len;
1411                0;
1412        }), ({
1413                char *p = kmap_atomic(v.bv_page);
1414                next = csum_partial_copy_nocheck(p + v.bv_offset,
1415                                                 (to += v.bv_len) - v.bv_len,
1416                                                 v.bv_len, 0);
1417                kunmap_atomic(p);
1418                sum = csum_block_add(sum, next, off);
1419                off += v.bv_len;
1420        }),({
1421                next = csum_partial_copy_nocheck(v.iov_base,
1422                                                 (to += v.iov_len) - v.iov_len,
1423                                                 v.iov_len, 0);
1424                sum = csum_block_add(sum, next, off);
1425                off += v.iov_len;
1426        })
1427        )
1428        *csum = sum;
1429        iov_iter_advance(i, bytes);
1430        return true;
1431}
1432EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1433
1434size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1435                             struct iov_iter *i)
1436{
1437        struct csum_state *csstate = _csstate;
1438        const char *from = addr;
1439        __wsum sum, next;
1440        size_t off;
1441
1442        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1443                WARN_ON(1);     /* for now */
1444                return 0;
1445        }
1446
1447        sum = csstate->csum;
1448        off = csstate->off;
1449        iterate_and_advance(i, bytes, v, ({
1450                int err = 0;
1451                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1452                                             v.iov_base,
1453                                             v.iov_len, 0, &err);
1454                if (!err) {
1455                        sum = csum_block_add(sum, next, off);
1456                        off += v.iov_len;
1457                }
1458                err ? v.iov_len : 0;
1459        }), ({
1460                char *p = kmap_atomic(v.bv_page);
1461                next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
1462                                                 p + v.bv_offset,
1463                                                 v.bv_len, 0);
1464                kunmap_atomic(p);
1465                sum = csum_block_add(sum, next, off);
1466                off += v.bv_len;
1467        }),({
1468                next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
1469                                                 v.iov_base,
1470                                                 v.iov_len, 0);
1471                sum = csum_block_add(sum, next, off);
1472                off += v.iov_len;
1473        })
1474        )
1475        csstate->csum = sum;
1476        csstate->off = off;
1477        return bytes;
1478}
1479EXPORT_SYMBOL(csum_and_copy_to_iter);
1480
1481size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1482                struct iov_iter *i)
1483{
1484#ifdef CONFIG_CRYPTO
1485        struct ahash_request *hash = hashp;
1486        struct scatterlist sg;
1487        size_t copied;
1488
1489        copied = copy_to_iter(addr, bytes, i);
1490        sg_init_one(&sg, addr, copied);
1491        ahash_request_set_crypt(hash, &sg, NULL, copied);
1492        crypto_ahash_update(hash);
1493        return copied;
1494#else
1495        return 0;
1496#endif
1497}
1498EXPORT_SYMBOL(hash_and_copy_to_iter);
1499
1500int iov_iter_npages(const struct iov_iter *i, int maxpages)
1501{
1502        size_t size = i->count;
1503        int npages = 0;
1504
1505        if (!size)
1506                return 0;
1507        if (unlikely(iov_iter_is_discard(i)))
1508                return 0;
1509
1510        if (unlikely(iov_iter_is_pipe(i))) {
1511                struct pipe_inode_info *pipe = i->pipe;
1512                size_t off;
1513                int idx;
1514
1515                if (!sanity(i))
1516                        return 0;
1517
1518                data_start(i, &idx, &off);
1519                /* some of this one + all after this one */
1520                npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1521                if (npages >= maxpages)
1522                        return maxpages;
1523        } else iterate_all_kinds(i, size, v, ({
1524                unsigned long p = (unsigned long)v.iov_base;
1525                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1526                        - p / PAGE_SIZE;
1527                if (npages >= maxpages)
1528                        return maxpages;
1529        0;}),({
1530                npages++;
1531                if (npages >= maxpages)
1532                        return maxpages;
1533        }),({
1534                unsigned long p = (unsigned long)v.iov_base;
1535                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1536                        - p / PAGE_SIZE;
1537                if (npages >= maxpages)
1538                        return maxpages;
1539        })
1540        )
1541        return npages;
1542}
1543EXPORT_SYMBOL(iov_iter_npages);
1544
1545const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1546{
1547        *new = *old;
1548        if (unlikely(iov_iter_is_pipe(new))) {
1549                WARN_ON(1);
1550                return NULL;
1551        }
1552        if (unlikely(iov_iter_is_discard(new)))
1553                return NULL;
1554        if (iov_iter_is_bvec(new))
1555                return new->bvec = kmemdup(new->bvec,
1556                                    new->nr_segs * sizeof(struct bio_vec),
1557                                    flags);
1558        else
1559                /* iovec and kvec have identical layout */
1560                return new->iov = kmemdup(new->iov,
1561                                   new->nr_segs * sizeof(struct iovec),
1562                                   flags);
1563}
1564EXPORT_SYMBOL(dup_iter);
1565
1566/**
1567 * import_iovec() - Copy an array of &struct iovec from userspace
1568 *     into the kernel, check that it is valid, and initialize a new
1569 *     &struct iov_iter iterator to access it.
1570 *
1571 * @type: One of %READ or %WRITE.
1572 * @uvector: Pointer to the userspace array.
1573 * @nr_segs: Number of elements in userspace array.
1574 * @fast_segs: Number of elements in @iov.
1575 * @iov: (input and output parameter) Pointer to pointer to (usually small
1576 *     on-stack) kernel array.
1577 * @i: Pointer to iterator that will be initialized on success.
1578 *
1579 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1580 * then this function places %NULL in *@iov on return. Otherwise, a new
1581 * array will be allocated and the result placed in *@iov. This means that
1582 * the caller may call kfree() on *@iov regardless of whether the small
1583 * on-stack array was used or not (and regardless of whether this function
1584 * returns an error or not).
1585 *
1586 * Return: Negative error code on error, bytes imported on success
1587 */
1588ssize_t import_iovec(int type, const struct iovec __user * uvector,
1589                 unsigned nr_segs, unsigned fast_segs,
1590                 struct iovec **iov, struct iov_iter *i)
1591{
1592        ssize_t n;
1593        struct iovec *p;
1594        n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1595                                  *iov, &p);
1596        if (n < 0) {
1597                if (p != *iov)
1598                        kfree(p);
1599                *iov = NULL;
1600                return n;
1601        }
1602        iov_iter_init(i, type, p, nr_segs, n);
1603        *iov = p == *iov ? NULL : p;
1604        return n;
1605}
1606EXPORT_SYMBOL(import_iovec);
1607
1608#ifdef CONFIG_COMPAT
1609#include <linux/compat.h>
1610
1611ssize_t compat_import_iovec(int type,
1612                const struct compat_iovec __user * uvector,
1613                unsigned nr_segs, unsigned fast_segs,
1614                struct iovec **iov, struct iov_iter *i)
1615{
1616        ssize_t n;
1617        struct iovec *p;
1618        n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1619                                  *iov, &p);
1620        if (n < 0) {
1621                if (p != *iov)
1622                        kfree(p);
1623                *iov = NULL;
1624                return n;
1625        }
1626        iov_iter_init(i, type, p, nr_segs, n);
1627        *iov = p == *iov ? NULL : p;
1628        return n;
1629}
1630#endif
1631
1632int import_single_range(int rw, void __user *buf, size_t len,
1633                 struct iovec *iov, struct iov_iter *i)
1634{
1635        if (len > MAX_RW_COUNT)
1636                len = MAX_RW_COUNT;
1637        if (unlikely(!access_ok(buf, len)))
1638                return -EFAULT;
1639
1640        iov->iov_base = buf;
1641        iov->iov_len = len;
1642        iov_iter_init(i, rw, iov, 1, len);
1643        return 0;
1644}
1645EXPORT_SYMBOL(import_single_range);
1646
1647int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1648                            int (*f)(struct kvec *vec, void *context),
1649                            void *context)
1650{
1651        struct kvec w;
1652        int err = -EINVAL;
1653        if (!bytes)
1654                return 0;
1655
1656        iterate_all_kinds(i, bytes, v, -EINVAL, ({
1657                w.iov_base = kmap(v.bv_page) + v.bv_offset;
1658                w.iov_len = v.bv_len;
1659                err = f(&w, context);
1660                kunmap(v.bv_page);
1661                err;}), ({
1662                w = v;
1663                err = f(&w, context);})
1664        )
1665        return err;
1666}
1667EXPORT_SYMBOL(iov_iter_for_each_range);
1668