linux/lib/iov_iter.c
<<
>>
Prefs
   1#include <linux/export.h>
   2#include <linux/bvec.h>
   3#include <linux/uio.h>
   4#include <linux/pagemap.h>
   5#include <linux/slab.h>
   6#include <linux/vmalloc.h>
   7#include <linux/splice.h>
   8#include <net/checksum.h>
   9
  10#define PIPE_PARANOIA /* for now */
  11
  12#define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
  13        size_t left;                                    \
  14        size_t wanted = n;                              \
  15        __p = i->iov;                                   \
  16        __v.iov_len = min(n, __p->iov_len - skip);      \
  17        if (likely(__v.iov_len)) {                      \
  18                __v.iov_base = __p->iov_base + skip;    \
  19                left = (STEP);                          \
  20                __v.iov_len -= left;                    \
  21                skip += __v.iov_len;                    \
  22                n -= __v.iov_len;                       \
  23        } else {                                        \
  24                left = 0;                               \
  25        }                                               \
  26        while (unlikely(!left && n)) {                  \
  27                __p++;                                  \
  28                __v.iov_len = min(n, __p->iov_len);     \
  29                if (unlikely(!__v.iov_len))             \
  30                        continue;                       \
  31                __v.iov_base = __p->iov_base;           \
  32                left = (STEP);                          \
  33                __v.iov_len -= left;                    \
  34                skip = __v.iov_len;                     \
  35                n -= __v.iov_len;                       \
  36        }                                               \
  37        n = wanted - n;                                 \
  38}
  39
  40#define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
  41        size_t wanted = n;                              \
  42        __p = i->kvec;                                  \
  43        __v.iov_len = min(n, __p->iov_len - skip);      \
  44        if (likely(__v.iov_len)) {                      \
  45                __v.iov_base = __p->iov_base + skip;    \
  46                (void)(STEP);                           \
  47                skip += __v.iov_len;                    \
  48                n -= __v.iov_len;                       \
  49        }                                               \
  50        while (unlikely(n)) {                           \
  51                __p++;                                  \
  52                __v.iov_len = min(n, __p->iov_len);     \
  53                if (unlikely(!__v.iov_len))             \
  54                        continue;                       \
  55                __v.iov_base = __p->iov_base;           \
  56                (void)(STEP);                           \
  57                skip = __v.iov_len;                     \
  58                n -= __v.iov_len;                       \
  59        }                                               \
  60        n = wanted;                                     \
  61}
  62
  63#define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
  64        struct bvec_iter __start;                       \
  65        __start.bi_size = n;                            \
  66        __start.bi_bvec_done = skip;                    \
  67        __start.bi_idx = 0;                             \
  68        for_each_bvec(__v, i->bvec, __bi, __start) {    \
  69                if (!__v.bv_len)                        \
  70                        continue;                       \
  71                (void)(STEP);                           \
  72        }                                               \
  73}
  74
  75#define iterate_all_kinds(i, n, v, I, B, K) {                   \
  76        if (likely(n)) {                                        \
  77                size_t skip = i->iov_offset;                    \
  78                if (unlikely(i->type & ITER_BVEC)) {            \
  79                        struct bio_vec v;                       \
  80                        struct bvec_iter __bi;                  \
  81                        iterate_bvec(i, n, v, __bi, skip, (B))  \
  82                } else if (unlikely(i->type & ITER_KVEC)) {     \
  83                        const struct kvec *kvec;                \
  84                        struct kvec v;                          \
  85                        iterate_kvec(i, n, v, kvec, skip, (K))  \
  86                } else {                                        \
  87                        const struct iovec *iov;                \
  88                        struct iovec v;                         \
  89                        iterate_iovec(i, n, v, iov, skip, (I))  \
  90                }                                               \
  91        }                                                       \
  92}
  93
  94#define iterate_and_advance(i, n, v, I, B, K) {                 \
  95        if (unlikely(i->count < n))                             \
  96                n = i->count;                                   \
  97        if (i->count) {                                         \
  98                size_t skip = i->iov_offset;                    \
  99                if (unlikely(i->type & ITER_BVEC)) {            \
 100                        const struct bio_vec *bvec = i->bvec;   \
 101                        struct bio_vec v;                       \
 102                        struct bvec_iter __bi;                  \
 103                        iterate_bvec(i, n, v, __bi, skip, (B))  \
 104                        i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
 105                        i->nr_segs -= i->bvec - bvec;           \
 106                        skip = __bi.bi_bvec_done;               \
 107                } else if (unlikely(i->type & ITER_KVEC)) {     \
 108                        const struct kvec *kvec;                \
 109                        struct kvec v;                          \
 110                        iterate_kvec(i, n, v, kvec, skip, (K))  \
 111                        if (skip == kvec->iov_len) {            \
 112                                kvec++;                         \
 113                                skip = 0;                       \
 114                        }                                       \
 115                        i->nr_segs -= kvec - i->kvec;           \
 116                        i->kvec = kvec;                         \
 117                } else {                                        \
 118                        const struct iovec *iov;                \
 119                        struct iovec v;                         \
 120                        iterate_iovec(i, n, v, iov, skip, (I))  \
 121                        if (skip == iov->iov_len) {             \
 122                                iov++;                          \
 123                                skip = 0;                       \
 124                        }                                       \
 125                        i->nr_segs -= iov - i->iov;             \
 126                        i->iov = iov;                           \
 127                }                                               \
 128                i->count -= n;                                  \
 129                i->iov_offset = skip;                           \
 130        }                                                       \
 131}
 132
 133static int copyout(void __user *to, const void *from, size_t n)
 134{
 135        if (access_ok(VERIFY_WRITE, to, n)) {
 136                kasan_check_read(from, n);
 137                n = raw_copy_to_user(to, from, n);
 138        }
 139        return n;
 140}
 141
 142static int copyin(void *to, const void __user *from, size_t n)
 143{
 144        if (access_ok(VERIFY_READ, from, n)) {
 145                kasan_check_write(to, n);
 146                n = raw_copy_from_user(to, from, n);
 147        }
 148        return n;
 149}
 150
 151static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 152                         struct iov_iter *i)
 153{
 154        size_t skip, copy, left, wanted;
 155        const struct iovec *iov;
 156        char __user *buf;
 157        void *kaddr, *from;
 158
 159        if (unlikely(bytes > i->count))
 160                bytes = i->count;
 161
 162        if (unlikely(!bytes))
 163                return 0;
 164
 165        might_fault();
 166        wanted = bytes;
 167        iov = i->iov;
 168        skip = i->iov_offset;
 169        buf = iov->iov_base + skip;
 170        copy = min(bytes, iov->iov_len - skip);
 171
 172        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
 173                kaddr = kmap_atomic(page);
 174                from = kaddr + offset;
 175
 176                /* first chunk, usually the only one */
 177                left = copyout(buf, from, copy);
 178                copy -= left;
 179                skip += copy;
 180                from += copy;
 181                bytes -= copy;
 182
 183                while (unlikely(!left && bytes)) {
 184                        iov++;
 185                        buf = iov->iov_base;
 186                        copy = min(bytes, iov->iov_len);
 187                        left = copyout(buf, from, copy);
 188                        copy -= left;
 189                        skip = copy;
 190                        from += copy;
 191                        bytes -= copy;
 192                }
 193                if (likely(!bytes)) {
 194                        kunmap_atomic(kaddr);
 195                        goto done;
 196                }
 197                offset = from - kaddr;
 198                buf += copy;
 199                kunmap_atomic(kaddr);
 200                copy = min(bytes, iov->iov_len - skip);
 201        }
 202        /* Too bad - revert to non-atomic kmap */
 203
 204        kaddr = kmap(page);
 205        from = kaddr + offset;
 206        left = copyout(buf, from, copy);
 207        copy -= left;
 208        skip += copy;
 209        from += copy;
 210        bytes -= copy;
 211        while (unlikely(!left && bytes)) {
 212                iov++;
 213                buf = iov->iov_base;
 214                copy = min(bytes, iov->iov_len);
 215                left = copyout(buf, from, copy);
 216                copy -= left;
 217                skip = copy;
 218                from += copy;
 219                bytes -= copy;
 220        }
 221        kunmap(page);
 222
 223done:
 224        if (skip == iov->iov_len) {
 225                iov++;
 226                skip = 0;
 227        }
 228        i->count -= wanted - bytes;
 229        i->nr_segs -= iov - i->iov;
 230        i->iov = iov;
 231        i->iov_offset = skip;
 232        return wanted - bytes;
 233}
 234
 235static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
 236                         struct iov_iter *i)
 237{
 238        size_t skip, copy, left, wanted;
 239        const struct iovec *iov;
 240        char __user *buf;
 241        void *kaddr, *to;
 242
 243        if (unlikely(bytes > i->count))
 244                bytes = i->count;
 245
 246        if (unlikely(!bytes))
 247                return 0;
 248
 249        might_fault();
 250        wanted = bytes;
 251        iov = i->iov;
 252        skip = i->iov_offset;
 253        buf = iov->iov_base + skip;
 254        copy = min(bytes, iov->iov_len - skip);
 255
 256        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
 257                kaddr = kmap_atomic(page);
 258                to = kaddr + offset;
 259
 260                /* first chunk, usually the only one */
 261                left = copyin(to, buf, copy);
 262                copy -= left;
 263                skip += copy;
 264                to += copy;
 265                bytes -= copy;
 266
 267                while (unlikely(!left && bytes)) {
 268                        iov++;
 269                        buf = iov->iov_base;
 270                        copy = min(bytes, iov->iov_len);
 271                        left = copyin(to, buf, copy);
 272                        copy -= left;
 273                        skip = copy;
 274                        to += copy;
 275                        bytes -= copy;
 276                }
 277                if (likely(!bytes)) {
 278                        kunmap_atomic(kaddr);
 279                        goto done;
 280                }
 281                offset = to - kaddr;
 282                buf += copy;
 283                kunmap_atomic(kaddr);
 284                copy = min(bytes, iov->iov_len - skip);
 285        }
 286        /* Too bad - revert to non-atomic kmap */
 287
 288        kaddr = kmap(page);
 289        to = kaddr + offset;
 290        left = copyin(to, buf, copy);
 291        copy -= left;
 292        skip += copy;
 293        to += copy;
 294        bytes -= copy;
 295        while (unlikely(!left && bytes)) {
 296                iov++;
 297                buf = iov->iov_base;
 298                copy = min(bytes, iov->iov_len);
 299                left = copyin(to, buf, copy);
 300                copy -= left;
 301                skip = copy;
 302                to += copy;
 303                bytes -= copy;
 304        }
 305        kunmap(page);
 306
 307done:
 308        if (skip == iov->iov_len) {
 309                iov++;
 310                skip = 0;
 311        }
 312        i->count -= wanted - bytes;
 313        i->nr_segs -= iov - i->iov;
 314        i->iov = iov;
 315        i->iov_offset = skip;
 316        return wanted - bytes;
 317}
 318
 319#ifdef PIPE_PARANOIA
 320static bool sanity(const struct iov_iter *i)
 321{
 322        struct pipe_inode_info *pipe = i->pipe;
 323        int idx = i->idx;
 324        int next = pipe->curbuf + pipe->nrbufs;
 325        if (i->iov_offset) {
 326                struct pipe_buffer *p;
 327                if (unlikely(!pipe->nrbufs))
 328                        goto Bad;       // pipe must be non-empty
 329                if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
 330                        goto Bad;       // must be at the last buffer...
 331
 332                p = &pipe->bufs[idx];
 333                if (unlikely(p->offset + p->len != i->iov_offset))
 334                        goto Bad;       // ... at the end of segment
 335        } else {
 336                if (idx != (next & (pipe->buffers - 1)))
 337                        goto Bad;       // must be right after the last buffer
 338        }
 339        return true;
 340Bad:
 341        printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
 342        printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
 343                        pipe->curbuf, pipe->nrbufs, pipe->buffers);
 344        for (idx = 0; idx < pipe->buffers; idx++)
 345                printk(KERN_ERR "[%p %p %d %d]\n",
 346                        pipe->bufs[idx].ops,
 347                        pipe->bufs[idx].page,
 348                        pipe->bufs[idx].offset,
 349                        pipe->bufs[idx].len);
 350        WARN_ON(1);
 351        return false;
 352}
 353#else
 354#define sanity(i) true
 355#endif
 356
 357static inline int next_idx(int idx, struct pipe_inode_info *pipe)
 358{
 359        return (idx + 1) & (pipe->buffers - 1);
 360}
 361
 362static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
 363                         struct iov_iter *i)
 364{
 365        struct pipe_inode_info *pipe = i->pipe;
 366        struct pipe_buffer *buf;
 367        size_t off;
 368        int idx;
 369
 370        if (unlikely(bytes > i->count))
 371                bytes = i->count;
 372
 373        if (unlikely(!bytes))
 374                return 0;
 375
 376        if (!sanity(i))
 377                return 0;
 378
 379        off = i->iov_offset;
 380        idx = i->idx;
 381        buf = &pipe->bufs[idx];
 382        if (off) {
 383                if (offset == off && buf->page == page) {
 384                        /* merge with the last one */
 385                        buf->len += bytes;
 386                        i->iov_offset += bytes;
 387                        goto out;
 388                }
 389                idx = next_idx(idx, pipe);
 390                buf = &pipe->bufs[idx];
 391        }
 392        if (idx == pipe->curbuf && pipe->nrbufs)
 393                return 0;
 394        pipe->nrbufs++;
 395        buf->ops = &page_cache_pipe_buf_ops;
 396        get_page(buf->page = page);
 397        buf->offset = offset;
 398        buf->len = bytes;
 399        i->iov_offset = offset + bytes;
 400        i->idx = idx;
 401out:
 402        i->count -= bytes;
 403        return bytes;
 404}
 405
 406/*
 407 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
 408 * bytes.  For each iovec, fault in each page that constitutes the iovec.
 409 *
 410 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
 411 * because it is an invalid address).
 412 */
 413int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 414{
 415        size_t skip = i->iov_offset;
 416        const struct iovec *iov;
 417        int err;
 418        struct iovec v;
 419
 420        if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
 421                iterate_iovec(i, bytes, v, iov, skip, ({
 422                        err = fault_in_pages_readable(v.iov_base, v.iov_len);
 423                        if (unlikely(err))
 424                        return err;
 425                0;}))
 426        }
 427        return 0;
 428}
 429EXPORT_SYMBOL(iov_iter_fault_in_readable);
 430
 431void iov_iter_init(struct iov_iter *i, int direction,
 432                        const struct iovec *iov, unsigned long nr_segs,
 433                        size_t count)
 434{
 435        /* It will get better.  Eventually... */
 436        if (uaccess_kernel()) {
 437                direction |= ITER_KVEC;
 438                i->type = direction;
 439                i->kvec = (struct kvec *)iov;
 440        } else {
 441                i->type = direction;
 442                i->iov = iov;
 443        }
 444        i->nr_segs = nr_segs;
 445        i->iov_offset = 0;
 446        i->count = count;
 447}
 448EXPORT_SYMBOL(iov_iter_init);
 449
 450static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
 451{
 452        char *from = kmap_atomic(page);
 453        memcpy(to, from + offset, len);
 454        kunmap_atomic(from);
 455}
 456
 457static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
 458{
 459        char *to = kmap_atomic(page);
 460        memcpy(to + offset, from, len);
 461        kunmap_atomic(to);
 462}
 463
 464static void memzero_page(struct page *page, size_t offset, size_t len)
 465{
 466        char *addr = kmap_atomic(page);
 467        memset(addr + offset, 0, len);
 468        kunmap_atomic(addr);
 469}
 470
 471static inline bool allocated(struct pipe_buffer *buf)
 472{
 473        return buf->ops == &default_pipe_buf_ops;
 474}
 475
 476static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
 477{
 478        size_t off = i->iov_offset;
 479        int idx = i->idx;
 480        if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
 481                idx = next_idx(idx, i->pipe);
 482                off = 0;
 483        }
 484        *idxp = idx;
 485        *offp = off;
 486}
 487
 488static size_t push_pipe(struct iov_iter *i, size_t size,
 489                        int *idxp, size_t *offp)
 490{
 491        struct pipe_inode_info *pipe = i->pipe;
 492        size_t off;
 493        int idx;
 494        ssize_t left;
 495
 496        if (unlikely(size > i->count))
 497                size = i->count;
 498        if (unlikely(!size))
 499                return 0;
 500
 501        left = size;
 502        data_start(i, &idx, &off);
 503        *idxp = idx;
 504        *offp = off;
 505        if (off) {
 506                left -= PAGE_SIZE - off;
 507                if (left <= 0) {
 508                        pipe->bufs[idx].len += size;
 509                        return size;
 510                }
 511                pipe->bufs[idx].len = PAGE_SIZE;
 512                idx = next_idx(idx, pipe);
 513        }
 514        while (idx != pipe->curbuf || !pipe->nrbufs) {
 515                struct page *page = alloc_page(GFP_USER);
 516                if (!page)
 517                        break;
 518                pipe->nrbufs++;
 519                pipe->bufs[idx].ops = &default_pipe_buf_ops;
 520                pipe->bufs[idx].page = page;
 521                pipe->bufs[idx].offset = 0;
 522                if (left <= PAGE_SIZE) {
 523                        pipe->bufs[idx].len = left;
 524                        return size;
 525                }
 526                pipe->bufs[idx].len = PAGE_SIZE;
 527                left -= PAGE_SIZE;
 528                idx = next_idx(idx, pipe);
 529        }
 530        return size - left;
 531}
 532
 533static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 534                                struct iov_iter *i)
 535{
 536        struct pipe_inode_info *pipe = i->pipe;
 537        size_t n, off;
 538        int idx;
 539
 540        if (!sanity(i))
 541                return 0;
 542
 543        bytes = n = push_pipe(i, bytes, &idx, &off);
 544        if (unlikely(!n))
 545                return 0;
 546        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 547                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 548                memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
 549                i->idx = idx;
 550                i->iov_offset = off + chunk;
 551                n -= chunk;
 552                addr += chunk;
 553        }
 554        i->count -= bytes;
 555        return bytes;
 556}
 557
 558size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 559{
 560        const char *from = addr;
 561        if (unlikely(i->type & ITER_PIPE))
 562                return copy_pipe_to_iter(addr, bytes, i);
 563        if (iter_is_iovec(i))
 564                might_fault();
 565        iterate_and_advance(i, bytes, v,
 566                copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 567                memcpy_to_page(v.bv_page, v.bv_offset,
 568                               (from += v.bv_len) - v.bv_len, v.bv_len),
 569                memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
 570        )
 571
 572        return bytes;
 573}
 574EXPORT_SYMBOL(_copy_to_iter);
 575
 576#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
 577static int copyout_mcsafe(void __user *to, const void *from, size_t n)
 578{
 579        if (access_ok(VERIFY_WRITE, to, n)) {
 580                kasan_check_read(from, n);
 581                n = copy_to_user_mcsafe((__force void *) to, from, n);
 582        }
 583        return n;
 584}
 585
 586static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
 587                const char *from, size_t len)
 588{
 589        unsigned long ret;
 590        char *to;
 591
 592        to = kmap_atomic(page);
 593        ret = memcpy_mcsafe(to + offset, from, len);
 594        kunmap_atomic(to);
 595
 596        return ret;
 597}
 598
 599static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 600                                struct iov_iter *i)
 601{
 602        struct pipe_inode_info *pipe = i->pipe;
 603        size_t n, off, xfer = 0;
 604        int idx;
 605
 606        if (!sanity(i))
 607                return 0;
 608
 609        bytes = n = push_pipe(i, bytes, &idx, &off);
 610        if (unlikely(!n))
 611                return 0;
 612        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 613                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 614                unsigned long rem;
 615
 616                rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
 617                                chunk);
 618                i->idx = idx;
 619                i->iov_offset = off + chunk - rem;
 620                xfer += chunk - rem;
 621                if (rem)
 622                        break;
 623                n -= chunk;
 624                addr += chunk;
 625        }
 626        i->count -= xfer;
 627        return xfer;
 628}
 629
 630/**
 631 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
 632 * @addr: source kernel address
 633 * @bytes: total transfer length
 634 * @iter: destination iterator
 635 *
 636 * The pmem driver arranges for filesystem-dax to use this facility via
 637 * dax_copy_to_iter() for protecting read/write to persistent memory.
 638 * Unless / until an architecture can guarantee identical performance
 639 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
 640 * performance regression to switch more users to the mcsafe version.
 641 *
 642 * Otherwise, the main differences between this and typical _copy_to_iter().
 643 *
 644 * * Typical tail/residue handling after a fault retries the copy
 645 *   byte-by-byte until the fault happens again. Re-triggering machine
 646 *   checks is potentially fatal so the implementation uses source
 647 *   alignment and poison alignment assumptions to avoid re-triggering
 648 *   hardware exceptions.
 649 *
 650 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
 651 *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
 652 *   a short copy.
 653 *
 654 * See MCSAFE_TEST for self-test.
 655 */
 656size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 657{
 658        const char *from = addr;
 659        unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 660
 661        if (unlikely(i->type & ITER_PIPE))
 662                return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 663        if (iter_is_iovec(i))
 664                might_fault();
 665        iterate_and_advance(i, bytes, v,
 666                copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 667                ({
 668                rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
 669                               (from += v.bv_len) - v.bv_len, v.bv_len);
 670                if (rem) {
 671                        curr_addr = (unsigned long) from;
 672                        bytes = curr_addr - s_addr - rem;
 673                        return bytes;
 674                }
 675                }),
 676                ({
 677                rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
 678                                v.iov_len);
 679                if (rem) {
 680                        curr_addr = (unsigned long) from;
 681                        bytes = curr_addr - s_addr - rem;
 682                        return bytes;
 683                }
 684                })
 685        )
 686
 687        return bytes;
 688}
 689EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 690#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
 691
 692size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 693{
 694        char *to = addr;
 695        if (unlikely(i->type & ITER_PIPE)) {
 696                WARN_ON(1);
 697                return 0;
 698        }
 699        if (iter_is_iovec(i))
 700                might_fault();
 701        iterate_and_advance(i, bytes, v,
 702                copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
 703                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 704                                 v.bv_offset, v.bv_len),
 705                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 706        )
 707
 708        return bytes;
 709}
 710EXPORT_SYMBOL(_copy_from_iter);
 711
 712bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 713{
 714        char *to = addr;
 715        if (unlikely(i->type & ITER_PIPE)) {
 716                WARN_ON(1);
 717                return false;
 718        }
 719        if (unlikely(i->count < bytes))
 720                return false;
 721
 722        if (iter_is_iovec(i))
 723                might_fault();
 724        iterate_all_kinds(i, bytes, v, ({
 725                if (copyin((to += v.iov_len) - v.iov_len,
 726                                      v.iov_base, v.iov_len))
 727                        return false;
 728                0;}),
 729                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 730                                 v.bv_offset, v.bv_len),
 731                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 732        )
 733
 734        iov_iter_advance(i, bytes);
 735        return true;
 736}
 737EXPORT_SYMBOL(_copy_from_iter_full);
 738
 739size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 740{
 741        char *to = addr;
 742        if (unlikely(i->type & ITER_PIPE)) {
 743                WARN_ON(1);
 744                return 0;
 745        }
 746        iterate_and_advance(i, bytes, v,
 747                __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 748                                         v.iov_base, v.iov_len),
 749                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 750                                 v.bv_offset, v.bv_len),
 751                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 752        )
 753
 754        return bytes;
 755}
 756EXPORT_SYMBOL(_copy_from_iter_nocache);
 757
 758#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 759/**
 760 * _copy_from_iter_flushcache - write destination through cpu cache
 761 * @addr: destination kernel address
 762 * @bytes: total transfer length
 763 * @iter: source iterator
 764 *
 765 * The pmem driver arranges for filesystem-dax to use this facility via
 766 * dax_copy_from_iter() for ensuring that writes to persistent memory
 767 * are flushed through the CPU cache. It is differentiated from
 768 * _copy_from_iter_nocache() in that guarantees all data is flushed for
 769 * all iterator types. The _copy_from_iter_nocache() only attempts to
 770 * bypass the cache for the ITER_IOVEC case, and on some archs may use
 771 * instructions that strand dirty-data in the cache.
 772 */
 773size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 774{
 775        char *to = addr;
 776        if (unlikely(i->type & ITER_PIPE)) {
 777                WARN_ON(1);
 778                return 0;
 779        }
 780        iterate_and_advance(i, bytes, v,
 781                __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
 782                                         v.iov_base, v.iov_len),
 783                memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
 784                                 v.bv_offset, v.bv_len),
 785                memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
 786                        v.iov_len)
 787        )
 788
 789        return bytes;
 790}
 791EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 792#endif
 793
 794bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 795{
 796        char *to = addr;
 797        if (unlikely(i->type & ITER_PIPE)) {
 798                WARN_ON(1);
 799                return false;
 800        }
 801        if (unlikely(i->count < bytes))
 802                return false;
 803        iterate_all_kinds(i, bytes, v, ({
 804                if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 805                                             v.iov_base, v.iov_len))
 806                        return false;
 807                0;}),
 808                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 809                                 v.bv_offset, v.bv_len),
 810                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 811        )
 812
 813        iov_iter_advance(i, bytes);
 814        return true;
 815}
 816EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 817
 818static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 819{
 820        struct page *head = compound_head(page);
 821        size_t v = n + offset + page_address(page) - page_address(head);
 822
 823        if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
 824                return true;
 825        WARN_ON(1);
 826        return false;
 827}
 828
 829size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 830                         struct iov_iter *i)
 831{
 832        if (unlikely(!page_copy_sane(page, offset, bytes)))
 833                return 0;
 834        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 835                void *kaddr = kmap_atomic(page);
 836                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 837                kunmap_atomic(kaddr);
 838                return wanted;
 839        } else if (likely(!(i->type & ITER_PIPE)))
 840                return copy_page_to_iter_iovec(page, offset, bytes, i);
 841        else
 842                return copy_page_to_iter_pipe(page, offset, bytes, i);
 843}
 844EXPORT_SYMBOL(copy_page_to_iter);
 845
 846size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 847                         struct iov_iter *i)
 848{
 849        if (unlikely(!page_copy_sane(page, offset, bytes)))
 850                return 0;
 851        if (unlikely(i->type & ITER_PIPE)) {
 852                WARN_ON(1);
 853                return 0;
 854        }
 855        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 856                void *kaddr = kmap_atomic(page);
 857                size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
 858                kunmap_atomic(kaddr);
 859                return wanted;
 860        } else
 861                return copy_page_from_iter_iovec(page, offset, bytes, i);
 862}
 863EXPORT_SYMBOL(copy_page_from_iter);
 864
 865static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 866{
 867        struct pipe_inode_info *pipe = i->pipe;
 868        size_t n, off;
 869        int idx;
 870
 871        if (!sanity(i))
 872                return 0;
 873
 874        bytes = n = push_pipe(i, bytes, &idx, &off);
 875        if (unlikely(!n))
 876                return 0;
 877
 878        for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 879                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 880                memzero_page(pipe->bufs[idx].page, off, chunk);
 881                i->idx = idx;
 882                i->iov_offset = off + chunk;
 883                n -= chunk;
 884        }
 885        i->count -= bytes;
 886        return bytes;
 887}
 888
 889size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 890{
 891        if (unlikely(i->type & ITER_PIPE))
 892                return pipe_zero(bytes, i);
 893        iterate_and_advance(i, bytes, v,
 894                clear_user(v.iov_base, v.iov_len),
 895                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
 896                memset(v.iov_base, 0, v.iov_len)
 897        )
 898
 899        return bytes;
 900}
 901EXPORT_SYMBOL(iov_iter_zero);
 902
 903size_t iov_iter_copy_from_user_atomic(struct page *page,
 904                struct iov_iter *i, unsigned long offset, size_t bytes)
 905{
 906        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
 907        if (unlikely(!page_copy_sane(page, offset, bytes))) {
 908                kunmap_atomic(kaddr);
 909                return 0;
 910        }
 911        if (unlikely(i->type & ITER_PIPE)) {
 912                kunmap_atomic(kaddr);
 913                WARN_ON(1);
 914                return 0;
 915        }
 916        iterate_all_kinds(i, bytes, v,
 917                copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
 918                memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
 919                                 v.bv_offset, v.bv_len),
 920                memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 921        )
 922        kunmap_atomic(kaddr);
 923        return bytes;
 924}
 925EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 926
 927static inline void pipe_truncate(struct iov_iter *i)
 928{
 929        struct pipe_inode_info *pipe = i->pipe;
 930        if (pipe->nrbufs) {
 931                size_t off = i->iov_offset;
 932                int idx = i->idx;
 933                int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
 934                if (off) {
 935                        pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
 936                        idx = next_idx(idx, pipe);
 937                        nrbufs++;
 938                }
 939                while (pipe->nrbufs > nrbufs) {
 940                        pipe_buf_release(pipe, &pipe->bufs[idx]);
 941                        idx = next_idx(idx, pipe);
 942                        pipe->nrbufs--;
 943                }
 944        }
 945}
 946
 947static void pipe_advance(struct iov_iter *i, size_t size)
 948{
 949        struct pipe_inode_info *pipe = i->pipe;
 950        if (unlikely(i->count < size))
 951                size = i->count;
 952        if (size) {
 953                struct pipe_buffer *buf;
 954                size_t off = i->iov_offset, left = size;
 955                int idx = i->idx;
 956                if (off) /* make it relative to the beginning of buffer */
 957                        left += off - pipe->bufs[idx].offset;
 958                while (1) {
 959                        buf = &pipe->bufs[idx];
 960                        if (left <= buf->len)
 961                                break;
 962                        left -= buf->len;
 963                        idx = next_idx(idx, pipe);
 964                }
 965                i->idx = idx;
 966                i->iov_offset = buf->offset + left;
 967        }
 968        i->count -= size;
 969        /* ... and discard everything past that point */
 970        pipe_truncate(i);
 971}
 972
 973void iov_iter_advance(struct iov_iter *i, size_t size)
 974{
 975        if (unlikely(i->type & ITER_PIPE)) {
 976                pipe_advance(i, size);
 977                return;
 978        }
 979        iterate_and_advance(i, size, v, 0, 0, 0)
 980}
 981EXPORT_SYMBOL(iov_iter_advance);
 982
 983void iov_iter_revert(struct iov_iter *i, size_t unroll)
 984{
 985        if (!unroll)
 986                return;
 987        if (WARN_ON(unroll > MAX_RW_COUNT))
 988                return;
 989        i->count += unroll;
 990        if (unlikely(i->type & ITER_PIPE)) {
 991                struct pipe_inode_info *pipe = i->pipe;
 992                int idx = i->idx;
 993                size_t off = i->iov_offset;
 994                while (1) {
 995                        size_t n = off - pipe->bufs[idx].offset;
 996                        if (unroll < n) {
 997                                off -= unroll;
 998                                break;
 999                        }
1000                        unroll -= n;
1001                        if (!unroll && idx == i->start_idx) {
1002                                off = 0;
1003                                break;
1004                        }
1005                        if (!idx--)
1006                                idx = pipe->buffers - 1;
1007                        off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1008                }
1009                i->iov_offset = off;
1010                i->idx = idx;
1011                pipe_truncate(i);
1012                return;
1013        }
1014        if (unroll <= i->iov_offset) {
1015                i->iov_offset -= unroll;
1016                return;
1017        }
1018        unroll -= i->iov_offset;
1019        if (i->type & ITER_BVEC) {
1020                const struct bio_vec *bvec = i->bvec;
1021                while (1) {
1022                        size_t n = (--bvec)->bv_len;
1023                        i->nr_segs++;
1024                        if (unroll <= n) {
1025                                i->bvec = bvec;
1026                                i->iov_offset = n - unroll;
1027                                return;
1028                        }
1029                        unroll -= n;
1030                }
1031        } else { /* same logics for iovec and kvec */
1032                const struct iovec *iov = i->iov;
1033                while (1) {
1034                        size_t n = (--iov)->iov_len;
1035                        i->nr_segs++;
1036                        if (unroll <= n) {
1037                                i->iov = iov;
1038                                i->iov_offset = n - unroll;
1039                                return;
1040                        }
1041                        unroll -= n;
1042                }
1043        }
1044}
1045EXPORT_SYMBOL(iov_iter_revert);
1046
1047/*
1048 * Return the count of just the current iov_iter segment.
1049 */
1050size_t iov_iter_single_seg_count(const struct iov_iter *i)
1051{
1052        if (unlikely(i->type & ITER_PIPE))
1053                return i->count;        // it is a silly place, anyway
1054        if (i->nr_segs == 1)
1055                return i->count;
1056        else if (i->type & ITER_BVEC)
1057                return min(i->count, i->bvec->bv_len - i->iov_offset);
1058        else
1059                return min(i->count, i->iov->iov_len - i->iov_offset);
1060}
1061EXPORT_SYMBOL(iov_iter_single_seg_count);
1062
1063void iov_iter_kvec(struct iov_iter *i, int direction,
1064                        const struct kvec *kvec, unsigned long nr_segs,
1065                        size_t count)
1066{
1067        BUG_ON(!(direction & ITER_KVEC));
1068        i->type = direction;
1069        i->kvec = kvec;
1070        i->nr_segs = nr_segs;
1071        i->iov_offset = 0;
1072        i->count = count;
1073}
1074EXPORT_SYMBOL(iov_iter_kvec);
1075
1076void iov_iter_bvec(struct iov_iter *i, int direction,
1077                        const struct bio_vec *bvec, unsigned long nr_segs,
1078                        size_t count)
1079{
1080        BUG_ON(!(direction & ITER_BVEC));
1081        i->type = direction;
1082        i->bvec = bvec;
1083        i->nr_segs = nr_segs;
1084        i->iov_offset = 0;
1085        i->count = count;
1086}
1087EXPORT_SYMBOL(iov_iter_bvec);
1088
1089void iov_iter_pipe(struct iov_iter *i, int direction,
1090                        struct pipe_inode_info *pipe,
1091                        size_t count)
1092{
1093        BUG_ON(direction != ITER_PIPE);
1094        WARN_ON(pipe->nrbufs == pipe->buffers);
1095        i->type = direction;
1096        i->pipe = pipe;
1097        i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1098        i->iov_offset = 0;
1099        i->count = count;
1100        i->start_idx = i->idx;
1101}
1102EXPORT_SYMBOL(iov_iter_pipe);
1103
1104unsigned long iov_iter_alignment(const struct iov_iter *i)
1105{
1106        unsigned long res = 0;
1107        size_t size = i->count;
1108
1109        if (unlikely(i->type & ITER_PIPE)) {
1110                if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1111                        return size | i->iov_offset;
1112                return size;
1113        }
1114        iterate_all_kinds(i, size, v,
1115                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1116                res |= v.bv_offset | v.bv_len,
1117                res |= (unsigned long)v.iov_base | v.iov_len
1118        )
1119        return res;
1120}
1121EXPORT_SYMBOL(iov_iter_alignment);
1122
1123unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1124{
1125        unsigned long res = 0;
1126        size_t size = i->count;
1127
1128        if (unlikely(i->type & ITER_PIPE)) {
1129                WARN_ON(1);
1130                return ~0U;
1131        }
1132
1133        iterate_all_kinds(i, size, v,
1134                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1135                        (size != v.iov_len ? size : 0), 0),
1136                (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1137                        (size != v.bv_len ? size : 0)),
1138                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1139                        (size != v.iov_len ? size : 0))
1140                );
1141        return res;
1142}
1143EXPORT_SYMBOL(iov_iter_gap_alignment);
1144
1145static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1146                                size_t maxsize,
1147                                struct page **pages,
1148                                int idx,
1149                                size_t *start)
1150{
1151        struct pipe_inode_info *pipe = i->pipe;
1152        ssize_t n = push_pipe(i, maxsize, &idx, start);
1153        if (!n)
1154                return -EFAULT;
1155
1156        maxsize = n;
1157        n += *start;
1158        while (n > 0) {
1159                get_page(*pages++ = pipe->bufs[idx].page);
1160                idx = next_idx(idx, pipe);
1161                n -= PAGE_SIZE;
1162        }
1163
1164        return maxsize;
1165}
1166
1167static ssize_t pipe_get_pages(struct iov_iter *i,
1168                   struct page **pages, size_t maxsize, unsigned maxpages,
1169                   size_t *start)
1170{
1171        unsigned npages;
1172        size_t capacity;
1173        int idx;
1174
1175        if (!maxsize)
1176                return 0;
1177
1178        if (!sanity(i))
1179                return -EFAULT;
1180
1181        data_start(i, &idx, start);
1182        /* some of this one + all after this one */
1183        npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1184        capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1185
1186        return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1187}
1188
1189ssize_t iov_iter_get_pages(struct iov_iter *i,
1190                   struct page **pages, size_t maxsize, unsigned maxpages,
1191                   size_t *start)
1192{
1193        if (maxsize > i->count)
1194                maxsize = i->count;
1195
1196        if (unlikely(i->type & ITER_PIPE))
1197                return pipe_get_pages(i, pages, maxsize, maxpages, start);
1198        iterate_all_kinds(i, maxsize, v, ({
1199                unsigned long addr = (unsigned long)v.iov_base;
1200                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1201                int n;
1202                int res;
1203
1204                if (len > maxpages * PAGE_SIZE)
1205                        len = maxpages * PAGE_SIZE;
1206                addr &= ~(PAGE_SIZE - 1);
1207                n = DIV_ROUND_UP(len, PAGE_SIZE);
1208                res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
1209                if (unlikely(res < 0))
1210                        return res;
1211                return (res == n ? len : res * PAGE_SIZE) - *start;
1212        0;}),({
1213                /* can't be more than PAGE_SIZE */
1214                *start = v.bv_offset;
1215                get_page(*pages = v.bv_page);
1216                return v.bv_len;
1217        }),({
1218                return -EFAULT;
1219        })
1220        )
1221        return 0;
1222}
1223EXPORT_SYMBOL(iov_iter_get_pages);
1224
1225static struct page **get_pages_array(size_t n)
1226{
1227        return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1228}
1229
1230static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1231                   struct page ***pages, size_t maxsize,
1232                   size_t *start)
1233{
1234        struct page **p;
1235        ssize_t n;
1236        int idx;
1237        int npages;
1238
1239        if (!maxsize)
1240                return 0;
1241
1242        if (!sanity(i))
1243                return -EFAULT;
1244
1245        data_start(i, &idx, start);
1246        /* some of this one + all after this one */
1247        npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1248        n = npages * PAGE_SIZE - *start;
1249        if (maxsize > n)
1250                maxsize = n;
1251        else
1252                npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1253        p = get_pages_array(npages);
1254        if (!p)
1255                return -ENOMEM;
1256        n = __pipe_get_pages(i, maxsize, p, idx, start);
1257        if (n > 0)
1258                *pages = p;
1259        else
1260                kvfree(p);
1261        return n;
1262}
1263
1264ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1265                   struct page ***pages, size_t maxsize,
1266                   size_t *start)
1267{
1268        struct page **p;
1269
1270        if (maxsize > i->count)
1271                maxsize = i->count;
1272
1273        if (unlikely(i->type & ITER_PIPE))
1274                return pipe_get_pages_alloc(i, pages, maxsize, start);
1275        iterate_all_kinds(i, maxsize, v, ({
1276                unsigned long addr = (unsigned long)v.iov_base;
1277                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1278                int n;
1279                int res;
1280
1281                addr &= ~(PAGE_SIZE - 1);
1282                n = DIV_ROUND_UP(len, PAGE_SIZE);
1283                p = get_pages_array(n);
1284                if (!p)
1285                        return -ENOMEM;
1286                res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
1287                if (unlikely(res < 0)) {
1288                        kvfree(p);
1289                        return res;
1290                }
1291                *pages = p;
1292                return (res == n ? len : res * PAGE_SIZE) - *start;
1293        0;}),({
1294                /* can't be more than PAGE_SIZE */
1295                *start = v.bv_offset;
1296                *pages = p = get_pages_array(1);
1297                if (!p)
1298                        return -ENOMEM;
1299                get_page(*p = v.bv_page);
1300                return v.bv_len;
1301        }),({
1302                return -EFAULT;
1303        })
1304        )
1305        return 0;
1306}
1307EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1308
1309size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1310                               struct iov_iter *i)
1311{
1312        char *to = addr;
1313        __wsum sum, next;
1314        size_t off = 0;
1315        sum = *csum;
1316        if (unlikely(i->type & ITER_PIPE)) {
1317                WARN_ON(1);
1318                return 0;
1319        }
1320        iterate_and_advance(i, bytes, v, ({
1321                int err = 0;
1322                next = csum_and_copy_from_user(v.iov_base,
1323                                               (to += v.iov_len) - v.iov_len,
1324                                               v.iov_len, 0, &err);
1325                if (!err) {
1326                        sum = csum_block_add(sum, next, off);
1327                        off += v.iov_len;
1328                }
1329                err ? v.iov_len : 0;
1330        }), ({
1331                char *p = kmap_atomic(v.bv_page);
1332                next = csum_partial_copy_nocheck(p + v.bv_offset,
1333                                                 (to += v.bv_len) - v.bv_len,
1334                                                 v.bv_len, 0);
1335                kunmap_atomic(p);
1336                sum = csum_block_add(sum, next, off);
1337                off += v.bv_len;
1338        }),({
1339                next = csum_partial_copy_nocheck(v.iov_base,
1340                                                 (to += v.iov_len) - v.iov_len,
1341                                                 v.iov_len, 0);
1342                sum = csum_block_add(sum, next, off);
1343                off += v.iov_len;
1344        })
1345        )
1346        *csum = sum;
1347        return bytes;
1348}
1349EXPORT_SYMBOL(csum_and_copy_from_iter);
1350
1351bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1352                               struct iov_iter *i)
1353{
1354        char *to = addr;
1355        __wsum sum, next;
1356        size_t off = 0;
1357        sum = *csum;
1358        if (unlikely(i->type & ITER_PIPE)) {
1359                WARN_ON(1);
1360                return false;
1361        }
1362        if (unlikely(i->count < bytes))
1363                return false;
1364        iterate_all_kinds(i, bytes, v, ({
1365                int err = 0;
1366                next = csum_and_copy_from_user(v.iov_base,
1367                                               (to += v.iov_len) - v.iov_len,
1368                                               v.iov_len, 0, &err);
1369                if (err)
1370                        return false;
1371                sum = csum_block_add(sum, next, off);
1372                off += v.iov_len;
1373                0;
1374        }), ({
1375                char *p = kmap_atomic(v.bv_page);
1376                next = csum_partial_copy_nocheck(p + v.bv_offset,
1377                                                 (to += v.bv_len) - v.bv_len,
1378                                                 v.bv_len, 0);
1379                kunmap_atomic(p);
1380                sum = csum_block_add(sum, next, off);
1381                off += v.bv_len;
1382        }),({
1383                next = csum_partial_copy_nocheck(v.iov_base,
1384                                                 (to += v.iov_len) - v.iov_len,
1385                                                 v.iov_len, 0);
1386                sum = csum_block_add(sum, next, off);
1387                off += v.iov_len;
1388        })
1389        )
1390        *csum = sum;
1391        iov_iter_advance(i, bytes);
1392        return true;
1393}
1394EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1395
1396size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
1397                             struct iov_iter *i)
1398{
1399        const char *from = addr;
1400        __wsum sum, next;
1401        size_t off = 0;
1402        sum = *csum;
1403        if (unlikely(i->type & ITER_PIPE)) {
1404                WARN_ON(1);     /* for now */
1405                return 0;
1406        }
1407        iterate_and_advance(i, bytes, v, ({
1408                int err = 0;
1409                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1410                                             v.iov_base,
1411                                             v.iov_len, 0, &err);
1412                if (!err) {
1413                        sum = csum_block_add(sum, next, off);
1414                        off += v.iov_len;
1415                }
1416                err ? v.iov_len : 0;
1417        }), ({
1418                char *p = kmap_atomic(v.bv_page);
1419                next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
1420                                                 p + v.bv_offset,
1421                                                 v.bv_len, 0);
1422                kunmap_atomic(p);
1423                sum = csum_block_add(sum, next, off);
1424                off += v.bv_len;
1425        }),({
1426                next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
1427                                                 v.iov_base,
1428                                                 v.iov_len, 0);
1429                sum = csum_block_add(sum, next, off);
1430                off += v.iov_len;
1431        })
1432        )
1433        *csum = sum;
1434        return bytes;
1435}
1436EXPORT_SYMBOL(csum_and_copy_to_iter);
1437
1438int iov_iter_npages(const struct iov_iter *i, int maxpages)
1439{
1440        size_t size = i->count;
1441        int npages = 0;
1442
1443        if (!size)
1444                return 0;
1445
1446        if (unlikely(i->type & ITER_PIPE)) {
1447                struct pipe_inode_info *pipe = i->pipe;
1448                size_t off;
1449                int idx;
1450
1451                if (!sanity(i))
1452                        return 0;
1453
1454                data_start(i, &idx, &off);
1455                /* some of this one + all after this one */
1456                npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1457                if (npages >= maxpages)
1458                        return maxpages;
1459        } else iterate_all_kinds(i, size, v, ({
1460                unsigned long p = (unsigned long)v.iov_base;
1461                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1462                        - p / PAGE_SIZE;
1463                if (npages >= maxpages)
1464                        return maxpages;
1465        0;}),({
1466                npages++;
1467                if (npages >= maxpages)
1468                        return maxpages;
1469        }),({
1470                unsigned long p = (unsigned long)v.iov_base;
1471                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1472                        - p / PAGE_SIZE;
1473                if (npages >= maxpages)
1474                        return maxpages;
1475        })
1476        )
1477        return npages;
1478}
1479EXPORT_SYMBOL(iov_iter_npages);
1480
1481const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1482{
1483        *new = *old;
1484        if (unlikely(new->type & ITER_PIPE)) {
1485                WARN_ON(1);
1486                return NULL;
1487        }
1488        if (new->type & ITER_BVEC)
1489                return new->bvec = kmemdup(new->bvec,
1490                                    new->nr_segs * sizeof(struct bio_vec),
1491                                    flags);
1492        else
1493                /* iovec and kvec have identical layout */
1494                return new->iov = kmemdup(new->iov,
1495                                   new->nr_segs * sizeof(struct iovec),
1496                                   flags);
1497}
1498EXPORT_SYMBOL(dup_iter);
1499
1500/**
1501 * import_iovec() - Copy an array of &struct iovec from userspace
1502 *     into the kernel, check that it is valid, and initialize a new
1503 *     &struct iov_iter iterator to access it.
1504 *
1505 * @type: One of %READ or %WRITE.
1506 * @uvector: Pointer to the userspace array.
1507 * @nr_segs: Number of elements in userspace array.
1508 * @fast_segs: Number of elements in @iov.
1509 * @iov: (input and output parameter) Pointer to pointer to (usually small
1510 *     on-stack) kernel array.
1511 * @i: Pointer to iterator that will be initialized on success.
1512 *
1513 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1514 * then this function places %NULL in *@iov on return. Otherwise, a new
1515 * array will be allocated and the result placed in *@iov. This means that
1516 * the caller may call kfree() on *@iov regardless of whether the small
1517 * on-stack array was used or not (and regardless of whether this function
1518 * returns an error or not).
1519 *
1520 * Return: 0 on success or negative error code on error.
1521 */
1522int import_iovec(int type, const struct iovec __user * uvector,
1523                 unsigned nr_segs, unsigned fast_segs,
1524                 struct iovec **iov, struct iov_iter *i)
1525{
1526        ssize_t n;
1527        struct iovec *p;
1528        n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1529                                  *iov, &p);
1530        if (n < 0) {
1531                if (p != *iov)
1532                        kfree(p);
1533                *iov = NULL;
1534                return n;
1535        }
1536        iov_iter_init(i, type, p, nr_segs, n);
1537        *iov = p == *iov ? NULL : p;
1538        return 0;
1539}
1540EXPORT_SYMBOL(import_iovec);
1541
1542#ifdef CONFIG_COMPAT
1543#include <linux/compat.h>
1544
1545int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
1546                 unsigned nr_segs, unsigned fast_segs,
1547                 struct iovec **iov, struct iov_iter *i)
1548{
1549        ssize_t n;
1550        struct iovec *p;
1551        n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1552                                  *iov, &p);
1553        if (n < 0) {
1554                if (p != *iov)
1555                        kfree(p);
1556                *iov = NULL;
1557                return n;
1558        }
1559        iov_iter_init(i, type, p, nr_segs, n);
1560        *iov = p == *iov ? NULL : p;
1561        return 0;
1562}
1563#endif
1564
1565int import_single_range(int rw, void __user *buf, size_t len,
1566                 struct iovec *iov, struct iov_iter *i)
1567{
1568        if (len > MAX_RW_COUNT)
1569                len = MAX_RW_COUNT;
1570        if (unlikely(!access_ok(!rw, buf, len)))
1571                return -EFAULT;
1572
1573        iov->iov_base = buf;
1574        iov->iov_len = len;
1575        iov_iter_init(i, rw, iov, 1, len);
1576        return 0;
1577}
1578EXPORT_SYMBOL(import_single_range);
1579
1580int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1581                            int (*f)(struct kvec *vec, void *context),
1582                            void *context)
1583{
1584        struct kvec w;
1585        int err = -EINVAL;
1586        if (!bytes)
1587                return 0;
1588
1589        iterate_all_kinds(i, bytes, v, -EINVAL, ({
1590                w.iov_base = kmap(v.bv_page) + v.bv_offset;
1591                w.iov_len = v.bv_len;
1592                err = f(&w, context);
1593                kunmap(v.bv_page);
1594                err;}), ({
1595                w = v;
1596                err = f(&w, context);})
1597        )
1598        return err;
1599}
1600EXPORT_SYMBOL(iov_iter_for_each_range);
1601