linux/lib/iov_iter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <crypto/hash.h>
   3#include <linux/export.h>
   4#include <linux/bvec.h>
   5#include <linux/uio.h>
   6#include <linux/pagemap.h>
   7#include <linux/slab.h>
   8#include <linux/vmalloc.h>
   9#include <linux/splice.h>
  10#include <net/checksum.h>
  11#include <linux/scatterlist.h>
  12#include <linux/instrumented.h>
  13
  14#define PIPE_PARANOIA /* for now */
  15
  16#define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
  17        size_t left;                                    \
  18        size_t wanted = n;                              \
  19        __p = i->iov;                                   \
  20        __v.iov_len = min(n, __p->iov_len - skip);      \
  21        if (likely(__v.iov_len)) {                      \
  22                __v.iov_base = __p->iov_base + skip;    \
  23                left = (STEP);                          \
  24                __v.iov_len -= left;                    \
  25                skip += __v.iov_len;                    \
  26                n -= __v.iov_len;                       \
  27        } else {                                        \
  28                left = 0;                               \
  29        }                                               \
  30        while (unlikely(!left && n)) {                  \
  31                __p++;                                  \
  32                __v.iov_len = min(n, __p->iov_len);     \
  33                if (unlikely(!__v.iov_len))             \
  34                        continue;                       \
  35                __v.iov_base = __p->iov_base;           \
  36                left = (STEP);                          \
  37                __v.iov_len -= left;                    \
  38                skip = __v.iov_len;                     \
  39                n -= __v.iov_len;                       \
  40        }                                               \
  41        n = wanted - n;                                 \
  42}
  43
  44#define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
  45        size_t wanted = n;                              \
  46        __p = i->kvec;                                  \
  47        __v.iov_len = min(n, __p->iov_len - skip);      \
  48        if (likely(__v.iov_len)) {                      \
  49                __v.iov_base = __p->iov_base + skip;    \
  50                (void)(STEP);                           \
  51                skip += __v.iov_len;                    \
  52                n -= __v.iov_len;                       \
  53        }                                               \
  54        while (unlikely(n)) {                           \
  55                __p++;                                  \
  56                __v.iov_len = min(n, __p->iov_len);     \
  57                if (unlikely(!__v.iov_len))             \
  58                        continue;                       \
  59                __v.iov_base = __p->iov_base;           \
  60                (void)(STEP);                           \
  61                skip = __v.iov_len;                     \
  62                n -= __v.iov_len;                       \
  63        }                                               \
  64        n = wanted;                                     \
  65}
  66
  67#define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
  68        struct bvec_iter __start;                       \
  69        __start.bi_size = n;                            \
  70        __start.bi_bvec_done = skip;                    \
  71        __start.bi_idx = 0;                             \
  72        for_each_bvec(__v, i->bvec, __bi, __start) {    \
  73                if (!__v.bv_len)                        \
  74                        continue;                       \
  75                (void)(STEP);                           \
  76        }                                               \
  77}
  78
  79#define iterate_all_kinds(i, n, v, I, B, K) {                   \
  80        if (likely(n)) {                                        \
  81                size_t skip = i->iov_offset;                    \
  82                if (unlikely(i->type & ITER_BVEC)) {            \
  83                        struct bio_vec v;                       \
  84                        struct bvec_iter __bi;                  \
  85                        iterate_bvec(i, n, v, __bi, skip, (B))  \
  86                } else if (unlikely(i->type & ITER_KVEC)) {     \
  87                        const struct kvec *kvec;                \
  88                        struct kvec v;                          \
  89                        iterate_kvec(i, n, v, kvec, skip, (K))  \
  90                } else if (unlikely(i->type & ITER_DISCARD)) {  \
  91                } else {                                        \
  92                        const struct iovec *iov;                \
  93                        struct iovec v;                         \
  94                        iterate_iovec(i, n, v, iov, skip, (I))  \
  95                }                                               \
  96        }                                                       \
  97}
  98
  99#define iterate_and_advance(i, n, v, I, B, K) {                 \
 100        if (unlikely(i->count < n))                             \
 101                n = i->count;                                   \
 102        if (i->count) {                                         \
 103                size_t skip = i->iov_offset;                    \
 104                if (unlikely(i->type & ITER_BVEC)) {            \
 105                        const struct bio_vec *bvec = i->bvec;   \
 106                        struct bio_vec v;                       \
 107                        struct bvec_iter __bi;                  \
 108                        iterate_bvec(i, n, v, __bi, skip, (B))  \
 109                        i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
 110                        i->nr_segs -= i->bvec - bvec;           \
 111                        skip = __bi.bi_bvec_done;               \
 112                } else if (unlikely(i->type & ITER_KVEC)) {     \
 113                        const struct kvec *kvec;                \
 114                        struct kvec v;                          \
 115                        iterate_kvec(i, n, v, kvec, skip, (K))  \
 116                        if (skip == kvec->iov_len) {            \
 117                                kvec++;                         \
 118                                skip = 0;                       \
 119                        }                                       \
 120                        i->nr_segs -= kvec - i->kvec;           \
 121                        i->kvec = kvec;                         \
 122                } else if (unlikely(i->type & ITER_DISCARD)) {  \
 123                        skip += n;                              \
 124                } else {                                        \
 125                        const struct iovec *iov;                \
 126                        struct iovec v;                         \
 127                        iterate_iovec(i, n, v, iov, skip, (I))  \
 128                        if (skip == iov->iov_len) {             \
 129                                iov++;                          \
 130                                skip = 0;                       \
 131                        }                                       \
 132                        i->nr_segs -= iov - i->iov;             \
 133                        i->iov = iov;                           \
 134                }                                               \
 135                i->count -= n;                                  \
 136                i->iov_offset = skip;                           \
 137        }                                                       \
 138}
 139
 140static int copyout(void __user *to, const void *from, size_t n)
 141{
 142        if (access_ok(to, n)) {
 143                instrument_copy_to_user(to, from, n);
 144                n = raw_copy_to_user(to, from, n);
 145        }
 146        return n;
 147}
 148
 149static int copyin(void *to, const void __user *from, size_t n)
 150{
 151        if (access_ok(from, n)) {
 152                instrument_copy_from_user(to, from, n);
 153                n = raw_copy_from_user(to, from, n);
 154        }
 155        return n;
 156}
 157
 158static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 159                         struct iov_iter *i)
 160{
 161        size_t skip, copy, left, wanted;
 162        const struct iovec *iov;
 163        char __user *buf;
 164        void *kaddr, *from;
 165
 166        if (unlikely(bytes > i->count))
 167                bytes = i->count;
 168
 169        if (unlikely(!bytes))
 170                return 0;
 171
 172        might_fault();
 173        wanted = bytes;
 174        iov = i->iov;
 175        skip = i->iov_offset;
 176        buf = iov->iov_base + skip;
 177        copy = min(bytes, iov->iov_len - skip);
 178
 179        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
 180                kaddr = kmap_atomic(page);
 181                from = kaddr + offset;
 182
 183                /* first chunk, usually the only one */
 184                left = copyout(buf, from, copy);
 185                copy -= left;
 186                skip += copy;
 187                from += copy;
 188                bytes -= copy;
 189
 190                while (unlikely(!left && bytes)) {
 191                        iov++;
 192                        buf = iov->iov_base;
 193                        copy = min(bytes, iov->iov_len);
 194                        left = copyout(buf, from, copy);
 195                        copy -= left;
 196                        skip = copy;
 197                        from += copy;
 198                        bytes -= copy;
 199                }
 200                if (likely(!bytes)) {
 201                        kunmap_atomic(kaddr);
 202                        goto done;
 203                }
 204                offset = from - kaddr;
 205                buf += copy;
 206                kunmap_atomic(kaddr);
 207                copy = min(bytes, iov->iov_len - skip);
 208        }
 209        /* Too bad - revert to non-atomic kmap */
 210
 211        kaddr = kmap(page);
 212        from = kaddr + offset;
 213        left = copyout(buf, from, copy);
 214        copy -= left;
 215        skip += copy;
 216        from += copy;
 217        bytes -= copy;
 218        while (unlikely(!left && bytes)) {
 219                iov++;
 220                buf = iov->iov_base;
 221                copy = min(bytes, iov->iov_len);
 222                left = copyout(buf, from, copy);
 223                copy -= left;
 224                skip = copy;
 225                from += copy;
 226                bytes -= copy;
 227        }
 228        kunmap(page);
 229
 230done:
 231        if (skip == iov->iov_len) {
 232                iov++;
 233                skip = 0;
 234        }
 235        i->count -= wanted - bytes;
 236        i->nr_segs -= iov - i->iov;
 237        i->iov = iov;
 238        i->iov_offset = skip;
 239        return wanted - bytes;
 240}
 241
 242static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
 243                         struct iov_iter *i)
 244{
 245        size_t skip, copy, left, wanted;
 246        const struct iovec *iov;
 247        char __user *buf;
 248        void *kaddr, *to;
 249
 250        if (unlikely(bytes > i->count))
 251                bytes = i->count;
 252
 253        if (unlikely(!bytes))
 254                return 0;
 255
 256        might_fault();
 257        wanted = bytes;
 258        iov = i->iov;
 259        skip = i->iov_offset;
 260        buf = iov->iov_base + skip;
 261        copy = min(bytes, iov->iov_len - skip);
 262
 263        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
 264                kaddr = kmap_atomic(page);
 265                to = kaddr + offset;
 266
 267                /* first chunk, usually the only one */
 268                left = copyin(to, buf, copy);
 269                copy -= left;
 270                skip += copy;
 271                to += copy;
 272                bytes -= copy;
 273
 274                while (unlikely(!left && bytes)) {
 275                        iov++;
 276                        buf = iov->iov_base;
 277                        copy = min(bytes, iov->iov_len);
 278                        left = copyin(to, buf, copy);
 279                        copy -= left;
 280                        skip = copy;
 281                        to += copy;
 282                        bytes -= copy;
 283                }
 284                if (likely(!bytes)) {
 285                        kunmap_atomic(kaddr);
 286                        goto done;
 287                }
 288                offset = to - kaddr;
 289                buf += copy;
 290                kunmap_atomic(kaddr);
 291                copy = min(bytes, iov->iov_len - skip);
 292        }
 293        /* Too bad - revert to non-atomic kmap */
 294
 295        kaddr = kmap(page);
 296        to = kaddr + offset;
 297        left = copyin(to, buf, copy);
 298        copy -= left;
 299        skip += copy;
 300        to += copy;
 301        bytes -= copy;
 302        while (unlikely(!left && bytes)) {
 303                iov++;
 304                buf = iov->iov_base;
 305                copy = min(bytes, iov->iov_len);
 306                left = copyin(to, buf, copy);
 307                copy -= left;
 308                skip = copy;
 309                to += copy;
 310                bytes -= copy;
 311        }
 312        kunmap(page);
 313
 314done:
 315        if (skip == iov->iov_len) {
 316                iov++;
 317                skip = 0;
 318        }
 319        i->count -= wanted - bytes;
 320        i->nr_segs -= iov - i->iov;
 321        i->iov = iov;
 322        i->iov_offset = skip;
 323        return wanted - bytes;
 324}
 325
 326#ifdef PIPE_PARANOIA
 327static bool sanity(const struct iov_iter *i)
 328{
 329        struct pipe_inode_info *pipe = i->pipe;
 330        unsigned int p_head = pipe->head;
 331        unsigned int p_tail = pipe->tail;
 332        unsigned int p_mask = pipe->ring_size - 1;
 333        unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
 334        unsigned int i_head = i->head;
 335        unsigned int idx;
 336
 337        if (i->iov_offset) {
 338                struct pipe_buffer *p;
 339                if (unlikely(p_occupancy == 0))
 340                        goto Bad;       // pipe must be non-empty
 341                if (unlikely(i_head != p_head - 1))
 342                        goto Bad;       // must be at the last buffer...
 343
 344                p = &pipe->bufs[i_head & p_mask];
 345                if (unlikely(p->offset + p->len != i->iov_offset))
 346                        goto Bad;       // ... at the end of segment
 347        } else {
 348                if (i_head != p_head)
 349                        goto Bad;       // must be right after the last buffer
 350        }
 351        return true;
 352Bad:
 353        printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
 354        printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
 355                        p_head, p_tail, pipe->ring_size);
 356        for (idx = 0; idx < pipe->ring_size; idx++)
 357                printk(KERN_ERR "[%p %p %d %d]\n",
 358                        pipe->bufs[idx].ops,
 359                        pipe->bufs[idx].page,
 360                        pipe->bufs[idx].offset,
 361                        pipe->bufs[idx].len);
 362        WARN_ON(1);
 363        return false;
 364}
 365#else
 366#define sanity(i) true
 367#endif
 368
 369static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
 370                         struct iov_iter *i)
 371{
 372        struct pipe_inode_info *pipe = i->pipe;
 373        struct pipe_buffer *buf;
 374        unsigned int p_tail = pipe->tail;
 375        unsigned int p_mask = pipe->ring_size - 1;
 376        unsigned int i_head = i->head;
 377        size_t off;
 378
 379        if (unlikely(bytes > i->count))
 380                bytes = i->count;
 381
 382        if (unlikely(!bytes))
 383                return 0;
 384
 385        if (!sanity(i))
 386                return 0;
 387
 388        off = i->iov_offset;
 389        buf = &pipe->bufs[i_head & p_mask];
 390        if (off) {
 391                if (offset == off && buf->page == page) {
 392                        /* merge with the last one */
 393                        buf->len += bytes;
 394                        i->iov_offset += bytes;
 395                        goto out;
 396                }
 397                i_head++;
 398                buf = &pipe->bufs[i_head & p_mask];
 399        }
 400        if (pipe_full(i_head, p_tail, pipe->max_usage))
 401                return 0;
 402
 403        buf->ops = &page_cache_pipe_buf_ops;
 404        get_page(page);
 405        buf->page = page;
 406        buf->offset = offset;
 407        buf->len = bytes;
 408
 409        pipe->head = i_head + 1;
 410        i->iov_offset = offset + bytes;
 411        i->head = i_head;
 412out:
 413        i->count -= bytes;
 414        return bytes;
 415}
 416
 417/*
 418 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
 419 * bytes.  For each iovec, fault in each page that constitutes the iovec.
 420 *
 421 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
 422 * because it is an invalid address).
 423 */
 424int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 425{
 426        size_t skip = i->iov_offset;
 427        const struct iovec *iov;
 428        int err;
 429        struct iovec v;
 430
 431        if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
 432                iterate_iovec(i, bytes, v, iov, skip, ({
 433                        err = fault_in_pages_readable(v.iov_base, v.iov_len);
 434                        if (unlikely(err))
 435                        return err;
 436                0;}))
 437        }
 438        return 0;
 439}
 440EXPORT_SYMBOL(iov_iter_fault_in_readable);
 441
 442void iov_iter_init(struct iov_iter *i, unsigned int direction,
 443                        const struct iovec *iov, unsigned long nr_segs,
 444                        size_t count)
 445{
 446        WARN_ON(direction & ~(READ | WRITE));
 447        direction &= READ | WRITE;
 448
 449        /* It will get better.  Eventually... */
 450        if (uaccess_kernel()) {
 451                i->type = ITER_KVEC | direction;
 452                i->kvec = (struct kvec *)iov;
 453        } else {
 454                i->type = ITER_IOVEC | direction;
 455                i->iov = iov;
 456        }
 457        i->nr_segs = nr_segs;
 458        i->iov_offset = 0;
 459        i->count = count;
 460}
 461EXPORT_SYMBOL(iov_iter_init);
 462
 463static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
 464{
 465        char *from = kmap_atomic(page);
 466        memcpy(to, from + offset, len);
 467        kunmap_atomic(from);
 468}
 469
 470static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
 471{
 472        char *to = kmap_atomic(page);
 473        memcpy(to + offset, from, len);
 474        kunmap_atomic(to);
 475}
 476
 477static void memzero_page(struct page *page, size_t offset, size_t len)
 478{
 479        char *addr = kmap_atomic(page);
 480        memset(addr + offset, 0, len);
 481        kunmap_atomic(addr);
 482}
 483
 484static inline bool allocated(struct pipe_buffer *buf)
 485{
 486        return buf->ops == &default_pipe_buf_ops;
 487}
 488
 489static inline void data_start(const struct iov_iter *i,
 490                              unsigned int *iter_headp, size_t *offp)
 491{
 492        unsigned int p_mask = i->pipe->ring_size - 1;
 493        unsigned int iter_head = i->head;
 494        size_t off = i->iov_offset;
 495
 496        if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
 497                    off == PAGE_SIZE)) {
 498                iter_head++;
 499                off = 0;
 500        }
 501        *iter_headp = iter_head;
 502        *offp = off;
 503}
 504
 505static size_t push_pipe(struct iov_iter *i, size_t size,
 506                        int *iter_headp, size_t *offp)
 507{
 508        struct pipe_inode_info *pipe = i->pipe;
 509        unsigned int p_tail = pipe->tail;
 510        unsigned int p_mask = pipe->ring_size - 1;
 511        unsigned int iter_head;
 512        size_t off;
 513        ssize_t left;
 514
 515        if (unlikely(size > i->count))
 516                size = i->count;
 517        if (unlikely(!size))
 518                return 0;
 519
 520        left = size;
 521        data_start(i, &iter_head, &off);
 522        *iter_headp = iter_head;
 523        *offp = off;
 524        if (off) {
 525                left -= PAGE_SIZE - off;
 526                if (left <= 0) {
 527                        pipe->bufs[iter_head & p_mask].len += size;
 528                        return size;
 529                }
 530                pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
 531                iter_head++;
 532        }
 533        while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
 534                struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
 535                struct page *page = alloc_page(GFP_USER);
 536                if (!page)
 537                        break;
 538
 539                buf->ops = &default_pipe_buf_ops;
 540                buf->page = page;
 541                buf->offset = 0;
 542                buf->len = min_t(ssize_t, left, PAGE_SIZE);
 543                left -= buf->len;
 544                iter_head++;
 545                pipe->head = iter_head;
 546
 547                if (left == 0)
 548                        return size;
 549        }
 550        return size - left;
 551}
 552
 553static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 554                                struct iov_iter *i)
 555{
 556        struct pipe_inode_info *pipe = i->pipe;
 557        unsigned int p_mask = pipe->ring_size - 1;
 558        unsigned int i_head;
 559        size_t n, off;
 560
 561        if (!sanity(i))
 562                return 0;
 563
 564        bytes = n = push_pipe(i, bytes, &i_head, &off);
 565        if (unlikely(!n))
 566                return 0;
 567        do {
 568                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 569                memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
 570                i->head = i_head;
 571                i->iov_offset = off + chunk;
 572                n -= chunk;
 573                addr += chunk;
 574                off = 0;
 575                i_head++;
 576        } while (n);
 577        i->count -= bytes;
 578        return bytes;
 579}
 580
 581static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 582                              __wsum sum, size_t off)
 583{
 584        __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
 585        return csum_block_add(sum, next, off);
 586}
 587
 588static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
 589                                __wsum *csum, struct iov_iter *i)
 590{
 591        struct pipe_inode_info *pipe = i->pipe;
 592        unsigned int p_mask = pipe->ring_size - 1;
 593        unsigned int i_head;
 594        size_t n, r;
 595        size_t off = 0;
 596        __wsum sum = *csum;
 597
 598        if (!sanity(i))
 599                return 0;
 600
 601        bytes = n = push_pipe(i, bytes, &i_head, &r);
 602        if (unlikely(!n))
 603                return 0;
 604        do {
 605                size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
 606                char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
 607                sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
 608                kunmap_atomic(p);
 609                i->head = i_head;
 610                i->iov_offset = r + chunk;
 611                n -= chunk;
 612                off += chunk;
 613                addr += chunk;
 614                r = 0;
 615                i_head++;
 616        } while (n);
 617        i->count -= bytes;
 618        *csum = sum;
 619        return bytes;
 620}
 621
 622size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 623{
 624        const char *from = addr;
 625        if (unlikely(iov_iter_is_pipe(i)))
 626                return copy_pipe_to_iter(addr, bytes, i);
 627        if (iter_is_iovec(i))
 628                might_fault();
 629        iterate_and_advance(i, bytes, v,
 630                copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 631                memcpy_to_page(v.bv_page, v.bv_offset,
 632                               (from += v.bv_len) - v.bv_len, v.bv_len),
 633                memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
 634        )
 635
 636        return bytes;
 637}
 638EXPORT_SYMBOL(_copy_to_iter);
 639
 640#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
 641static int copyout_mcsafe(void __user *to, const void *from, size_t n)
 642{
 643        if (access_ok(to, n)) {
 644                instrument_copy_to_user(to, from, n);
 645                n = copy_to_user_mcsafe((__force void *) to, from, n);
 646        }
 647        return n;
 648}
 649
 650static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
 651                const char *from, size_t len)
 652{
 653        unsigned long ret;
 654        char *to;
 655
 656        to = kmap_atomic(page);
 657        ret = memcpy_mcsafe(to + offset, from, len);
 658        kunmap_atomic(to);
 659
 660        return ret;
 661}
 662
 663static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 664                                struct iov_iter *i)
 665{
 666        struct pipe_inode_info *pipe = i->pipe;
 667        unsigned int p_mask = pipe->ring_size - 1;
 668        unsigned int i_head;
 669        size_t n, off, xfer = 0;
 670
 671        if (!sanity(i))
 672                return 0;
 673
 674        bytes = n = push_pipe(i, bytes, &i_head, &off);
 675        if (unlikely(!n))
 676                return 0;
 677        do {
 678                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 679                unsigned long rem;
 680
 681                rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
 682                                            off, addr, chunk);
 683                i->head = i_head;
 684                i->iov_offset = off + chunk - rem;
 685                xfer += chunk - rem;
 686                if (rem)
 687                        break;
 688                n -= chunk;
 689                addr += chunk;
 690                off = 0;
 691                i_head++;
 692        } while (n);
 693        i->count -= xfer;
 694        return xfer;
 695}
 696
 697/**
 698 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
 699 * @addr: source kernel address
 700 * @bytes: total transfer length
 701 * @iter: destination iterator
 702 *
 703 * The pmem driver arranges for filesystem-dax to use this facility via
 704 * dax_copy_to_iter() for protecting read/write to persistent memory.
 705 * Unless / until an architecture can guarantee identical performance
 706 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
 707 * performance regression to switch more users to the mcsafe version.
 708 *
 709 * Otherwise, the main differences between this and typical _copy_to_iter().
 710 *
 711 * * Typical tail/residue handling after a fault retries the copy
 712 *   byte-by-byte until the fault happens again. Re-triggering machine
 713 *   checks is potentially fatal so the implementation uses source
 714 *   alignment and poison alignment assumptions to avoid re-triggering
 715 *   hardware exceptions.
 716 *
 717 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
 718 *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
 719 *   a short copy.
 720 *
 721 * See MCSAFE_TEST for self-test.
 722 */
 723size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 724{
 725        const char *from = addr;
 726        unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 727
 728        if (unlikely(iov_iter_is_pipe(i)))
 729                return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 730        if (iter_is_iovec(i))
 731                might_fault();
 732        iterate_and_advance(i, bytes, v,
 733                copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 734                ({
 735                rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
 736                               (from += v.bv_len) - v.bv_len, v.bv_len);
 737                if (rem) {
 738                        curr_addr = (unsigned long) from;
 739                        bytes = curr_addr - s_addr - rem;
 740                        return bytes;
 741                }
 742                }),
 743                ({
 744                rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
 745                                v.iov_len);
 746                if (rem) {
 747                        curr_addr = (unsigned long) from;
 748                        bytes = curr_addr - s_addr - rem;
 749                        return bytes;
 750                }
 751                })
 752        )
 753
 754        return bytes;
 755}
 756EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 757#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
 758
 759size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 760{
 761        char *to = addr;
 762        if (unlikely(iov_iter_is_pipe(i))) {
 763                WARN_ON(1);
 764                return 0;
 765        }
 766        if (iter_is_iovec(i))
 767                might_fault();
 768        iterate_and_advance(i, bytes, v,
 769                copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
 770                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 771                                 v.bv_offset, v.bv_len),
 772                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 773        )
 774
 775        return bytes;
 776}
 777EXPORT_SYMBOL(_copy_from_iter);
 778
 779bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 780{
 781        char *to = addr;
 782        if (unlikely(iov_iter_is_pipe(i))) {
 783                WARN_ON(1);
 784                return false;
 785        }
 786        if (unlikely(i->count < bytes))
 787                return false;
 788
 789        if (iter_is_iovec(i))
 790                might_fault();
 791        iterate_all_kinds(i, bytes, v, ({
 792                if (copyin((to += v.iov_len) - v.iov_len,
 793                                      v.iov_base, v.iov_len))
 794                        return false;
 795                0;}),
 796                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 797                                 v.bv_offset, v.bv_len),
 798                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 799        )
 800
 801        iov_iter_advance(i, bytes);
 802        return true;
 803}
 804EXPORT_SYMBOL(_copy_from_iter_full);
 805
 806size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 807{
 808        char *to = addr;
 809        if (unlikely(iov_iter_is_pipe(i))) {
 810                WARN_ON(1);
 811                return 0;
 812        }
 813        iterate_and_advance(i, bytes, v,
 814                __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 815                                         v.iov_base, v.iov_len),
 816                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 817                                 v.bv_offset, v.bv_len),
 818                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 819        )
 820
 821        return bytes;
 822}
 823EXPORT_SYMBOL(_copy_from_iter_nocache);
 824
 825#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 826/**
 827 * _copy_from_iter_flushcache - write destination through cpu cache
 828 * @addr: destination kernel address
 829 * @bytes: total transfer length
 830 * @iter: source iterator
 831 *
 832 * The pmem driver arranges for filesystem-dax to use this facility via
 833 * dax_copy_from_iter() for ensuring that writes to persistent memory
 834 * are flushed through the CPU cache. It is differentiated from
 835 * _copy_from_iter_nocache() in that guarantees all data is flushed for
 836 * all iterator types. The _copy_from_iter_nocache() only attempts to
 837 * bypass the cache for the ITER_IOVEC case, and on some archs may use
 838 * instructions that strand dirty-data in the cache.
 839 */
 840size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 841{
 842        char *to = addr;
 843        if (unlikely(iov_iter_is_pipe(i))) {
 844                WARN_ON(1);
 845                return 0;
 846        }
 847        iterate_and_advance(i, bytes, v,
 848                __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
 849                                         v.iov_base, v.iov_len),
 850                memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
 851                                 v.bv_offset, v.bv_len),
 852                memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
 853                        v.iov_len)
 854        )
 855
 856        return bytes;
 857}
 858EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 859#endif
 860
 861bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 862{
 863        char *to = addr;
 864        if (unlikely(iov_iter_is_pipe(i))) {
 865                WARN_ON(1);
 866                return false;
 867        }
 868        if (unlikely(i->count < bytes))
 869                return false;
 870        iterate_all_kinds(i, bytes, v, ({
 871                if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 872                                             v.iov_base, v.iov_len))
 873                        return false;
 874                0;}),
 875                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 876                                 v.bv_offset, v.bv_len),
 877                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 878        )
 879
 880        iov_iter_advance(i, bytes);
 881        return true;
 882}
 883EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 884
 885static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 886{
 887        struct page *head;
 888        size_t v = n + offset;
 889
 890        /*
 891         * The general case needs to access the page order in order
 892         * to compute the page size.
 893         * However, we mostly deal with order-0 pages and thus can
 894         * avoid a possible cache line miss for requests that fit all
 895         * page orders.
 896         */
 897        if (n <= v && v <= PAGE_SIZE)
 898                return true;
 899
 900        head = compound_head(page);
 901        v += (page - head) << PAGE_SHIFT;
 902
 903        if (likely(n <= v && v <= (page_size(head))))
 904                return true;
 905        WARN_ON(1);
 906        return false;
 907}
 908
 909size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 910                         struct iov_iter *i)
 911{
 912        if (unlikely(!page_copy_sane(page, offset, bytes)))
 913                return 0;
 914        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 915                void *kaddr = kmap_atomic(page);
 916                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 917                kunmap_atomic(kaddr);
 918                return wanted;
 919        } else if (unlikely(iov_iter_is_discard(i)))
 920                return bytes;
 921        else if (likely(!iov_iter_is_pipe(i)))
 922                return copy_page_to_iter_iovec(page, offset, bytes, i);
 923        else
 924                return copy_page_to_iter_pipe(page, offset, bytes, i);
 925}
 926EXPORT_SYMBOL(copy_page_to_iter);
 927
 928size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 929                         struct iov_iter *i)
 930{
 931        if (unlikely(!page_copy_sane(page, offset, bytes)))
 932                return 0;
 933        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 934                WARN_ON(1);
 935                return 0;
 936        }
 937        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 938                void *kaddr = kmap_atomic(page);
 939                size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
 940                kunmap_atomic(kaddr);
 941                return wanted;
 942        } else
 943                return copy_page_from_iter_iovec(page, offset, bytes, i);
 944}
 945EXPORT_SYMBOL(copy_page_from_iter);
 946
 947static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 948{
 949        struct pipe_inode_info *pipe = i->pipe;
 950        unsigned int p_mask = pipe->ring_size - 1;
 951        unsigned int i_head;
 952        size_t n, off;
 953
 954        if (!sanity(i))
 955                return 0;
 956
 957        bytes = n = push_pipe(i, bytes, &i_head, &off);
 958        if (unlikely(!n))
 959                return 0;
 960
 961        do {
 962                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 963                memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
 964                i->head = i_head;
 965                i->iov_offset = off + chunk;
 966                n -= chunk;
 967                off = 0;
 968                i_head++;
 969        } while (n);
 970        i->count -= bytes;
 971        return bytes;
 972}
 973
 974size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 975{
 976        if (unlikely(iov_iter_is_pipe(i)))
 977                return pipe_zero(bytes, i);
 978        iterate_and_advance(i, bytes, v,
 979                clear_user(v.iov_base, v.iov_len),
 980                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
 981                memset(v.iov_base, 0, v.iov_len)
 982        )
 983
 984        return bytes;
 985}
 986EXPORT_SYMBOL(iov_iter_zero);
 987
 988size_t iov_iter_copy_from_user_atomic(struct page *page,
 989                struct iov_iter *i, unsigned long offset, size_t bytes)
 990{
 991        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
 992        if (unlikely(!page_copy_sane(page, offset, bytes))) {
 993                kunmap_atomic(kaddr);
 994                return 0;
 995        }
 996        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 997                kunmap_atomic(kaddr);
 998                WARN_ON(1);
 999                return 0;
1000        }
1001        iterate_all_kinds(i, bytes, v,
1002                copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1003                memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1004                                 v.bv_offset, v.bv_len),
1005                memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
1006        )
1007        kunmap_atomic(kaddr);
1008        return bytes;
1009}
1010EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1011
1012static inline void pipe_truncate(struct iov_iter *i)
1013{
1014        struct pipe_inode_info *pipe = i->pipe;
1015        unsigned int p_tail = pipe->tail;
1016        unsigned int p_head = pipe->head;
1017        unsigned int p_mask = pipe->ring_size - 1;
1018
1019        if (!pipe_empty(p_head, p_tail)) {
1020                struct pipe_buffer *buf;
1021                unsigned int i_head = i->head;
1022                size_t off = i->iov_offset;
1023
1024                if (off) {
1025                        buf = &pipe->bufs[i_head & p_mask];
1026                        buf->len = off - buf->offset;
1027                        i_head++;
1028                }
1029                while (p_head != i_head) {
1030                        p_head--;
1031                        pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1032                }
1033
1034                pipe->head = p_head;
1035        }
1036}
1037
1038static void pipe_advance(struct iov_iter *i, size_t size)
1039{
1040        struct pipe_inode_info *pipe = i->pipe;
1041        if (unlikely(i->count < size))
1042                size = i->count;
1043        if (size) {
1044                struct pipe_buffer *buf;
1045                unsigned int p_mask = pipe->ring_size - 1;
1046                unsigned int i_head = i->head;
1047                size_t off = i->iov_offset, left = size;
1048
1049                if (off) /* make it relative to the beginning of buffer */
1050                        left += off - pipe->bufs[i_head & p_mask].offset;
1051                while (1) {
1052                        buf = &pipe->bufs[i_head & p_mask];
1053                        if (left <= buf->len)
1054                                break;
1055                        left -= buf->len;
1056                        i_head++;
1057                }
1058                i->head = i_head;
1059                i->iov_offset = buf->offset + left;
1060        }
1061        i->count -= size;
1062        /* ... and discard everything past that point */
1063        pipe_truncate(i);
1064}
1065
1066void iov_iter_advance(struct iov_iter *i, size_t size)
1067{
1068        if (unlikely(iov_iter_is_pipe(i))) {
1069                pipe_advance(i, size);
1070                return;
1071        }
1072        if (unlikely(iov_iter_is_discard(i))) {
1073                i->count -= size;
1074                return;
1075        }
1076        iterate_and_advance(i, size, v, 0, 0, 0)
1077}
1078EXPORT_SYMBOL(iov_iter_advance);
1079
1080void iov_iter_revert(struct iov_iter *i, size_t unroll)
1081{
1082        if (!unroll)
1083                return;
1084        if (WARN_ON(unroll > MAX_RW_COUNT))
1085                return;
1086        i->count += unroll;
1087        if (unlikely(iov_iter_is_pipe(i))) {
1088                struct pipe_inode_info *pipe = i->pipe;
1089                unsigned int p_mask = pipe->ring_size - 1;
1090                unsigned int i_head = i->head;
1091                size_t off = i->iov_offset;
1092                while (1) {
1093                        struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1094                        size_t n = off - b->offset;
1095                        if (unroll < n) {
1096                                off -= unroll;
1097                                break;
1098                        }
1099                        unroll -= n;
1100                        if (!unroll && i_head == i->start_head) {
1101                                off = 0;
1102                                break;
1103                        }
1104                        i_head--;
1105                        b = &pipe->bufs[i_head & p_mask];
1106                        off = b->offset + b->len;
1107                }
1108                i->iov_offset = off;
1109                i->head = i_head;
1110                pipe_truncate(i);
1111                return;
1112        }
1113        if (unlikely(iov_iter_is_discard(i)))
1114                return;
1115        if (unroll <= i->iov_offset) {
1116                i->iov_offset -= unroll;
1117                return;
1118        }
1119        unroll -= i->iov_offset;
1120        if (iov_iter_is_bvec(i)) {
1121                const struct bio_vec *bvec = i->bvec;
1122                while (1) {
1123                        size_t n = (--bvec)->bv_len;
1124                        i->nr_segs++;
1125                        if (unroll <= n) {
1126                                i->bvec = bvec;
1127                                i->iov_offset = n - unroll;
1128                                return;
1129                        }
1130                        unroll -= n;
1131                }
1132        } else { /* same logics for iovec and kvec */
1133                const struct iovec *iov = i->iov;
1134                while (1) {
1135                        size_t n = (--iov)->iov_len;
1136                        i->nr_segs++;
1137                        if (unroll <= n) {
1138                                i->iov = iov;
1139                                i->iov_offset = n - unroll;
1140                                return;
1141                        }
1142                        unroll -= n;
1143                }
1144        }
1145}
1146EXPORT_SYMBOL(iov_iter_revert);
1147
1148/*
1149 * Return the count of just the current iov_iter segment.
1150 */
1151size_t iov_iter_single_seg_count(const struct iov_iter *i)
1152{
1153        if (unlikely(iov_iter_is_pipe(i)))
1154                return i->count;        // it is a silly place, anyway
1155        if (i->nr_segs == 1)
1156                return i->count;
1157        if (unlikely(iov_iter_is_discard(i)))
1158                return i->count;
1159        else if (iov_iter_is_bvec(i))
1160                return min(i->count, i->bvec->bv_len - i->iov_offset);
1161        else
1162                return min(i->count, i->iov->iov_len - i->iov_offset);
1163}
1164EXPORT_SYMBOL(iov_iter_single_seg_count);
1165
1166void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1167                        const struct kvec *kvec, unsigned long nr_segs,
1168                        size_t count)
1169{
1170        WARN_ON(direction & ~(READ | WRITE));
1171        i->type = ITER_KVEC | (direction & (READ | WRITE));
1172        i->kvec = kvec;
1173        i->nr_segs = nr_segs;
1174        i->iov_offset = 0;
1175        i->count = count;
1176}
1177EXPORT_SYMBOL(iov_iter_kvec);
1178
1179void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1180                        const struct bio_vec *bvec, unsigned long nr_segs,
1181                        size_t count)
1182{
1183        WARN_ON(direction & ~(READ | WRITE));
1184        i->type = ITER_BVEC | (direction & (READ | WRITE));
1185        i->bvec = bvec;
1186        i->nr_segs = nr_segs;
1187        i->iov_offset = 0;
1188        i->count = count;
1189}
1190EXPORT_SYMBOL(iov_iter_bvec);
1191
1192void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1193                        struct pipe_inode_info *pipe,
1194                        size_t count)
1195{
1196        BUG_ON(direction != READ);
1197        WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1198        i->type = ITER_PIPE | READ;
1199        i->pipe = pipe;
1200        i->head = pipe->head;
1201        i->iov_offset = 0;
1202        i->count = count;
1203        i->start_head = i->head;
1204}
1205EXPORT_SYMBOL(iov_iter_pipe);
1206
1207/**
1208 * iov_iter_discard - Initialise an I/O iterator that discards data
1209 * @i: The iterator to initialise.
1210 * @direction: The direction of the transfer.
1211 * @count: The size of the I/O buffer in bytes.
1212 *
1213 * Set up an I/O iterator that just discards everything that's written to it.
1214 * It's only available as a READ iterator.
1215 */
1216void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1217{
1218        BUG_ON(direction != READ);
1219        i->type = ITER_DISCARD | READ;
1220        i->count = count;
1221        i->iov_offset = 0;
1222}
1223EXPORT_SYMBOL(iov_iter_discard);
1224
1225unsigned long iov_iter_alignment(const struct iov_iter *i)
1226{
1227        unsigned long res = 0;
1228        size_t size = i->count;
1229
1230        if (unlikely(iov_iter_is_pipe(i))) {
1231                unsigned int p_mask = i->pipe->ring_size - 1;
1232
1233                if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1234                        return size | i->iov_offset;
1235                return size;
1236        }
1237        iterate_all_kinds(i, size, v,
1238                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1239                res |= v.bv_offset | v.bv_len,
1240                res |= (unsigned long)v.iov_base | v.iov_len
1241        )
1242        return res;
1243}
1244EXPORT_SYMBOL(iov_iter_alignment);
1245
1246unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1247{
1248        unsigned long res = 0;
1249        size_t size = i->count;
1250
1251        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1252                WARN_ON(1);
1253                return ~0U;
1254        }
1255
1256        iterate_all_kinds(i, size, v,
1257                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1258                        (size != v.iov_len ? size : 0), 0),
1259                (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1260                        (size != v.bv_len ? size : 0)),
1261                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1262                        (size != v.iov_len ? size : 0))
1263                );
1264        return res;
1265}
1266EXPORT_SYMBOL(iov_iter_gap_alignment);
1267
1268static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1269                                size_t maxsize,
1270                                struct page **pages,
1271                                int iter_head,
1272                                size_t *start)
1273{
1274        struct pipe_inode_info *pipe = i->pipe;
1275        unsigned int p_mask = pipe->ring_size - 1;
1276        ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1277        if (!n)
1278                return -EFAULT;
1279
1280        maxsize = n;
1281        n += *start;
1282        while (n > 0) {
1283                get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1284                iter_head++;
1285                n -= PAGE_SIZE;
1286        }
1287
1288        return maxsize;
1289}
1290
1291static ssize_t pipe_get_pages(struct iov_iter *i,
1292                   struct page **pages, size_t maxsize, unsigned maxpages,
1293                   size_t *start)
1294{
1295        unsigned int iter_head, npages;
1296        size_t capacity;
1297
1298        if (!maxsize)
1299                return 0;
1300
1301        if (!sanity(i))
1302                return -EFAULT;
1303
1304        data_start(i, &iter_head, start);
1305        /* Amount of free space: some of this one + all after this one */
1306        npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1307        capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1308
1309        return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1310}
1311
1312ssize_t iov_iter_get_pages(struct iov_iter *i,
1313                   struct page **pages, size_t maxsize, unsigned maxpages,
1314                   size_t *start)
1315{
1316        if (maxsize > i->count)
1317                maxsize = i->count;
1318
1319        if (unlikely(iov_iter_is_pipe(i)))
1320                return pipe_get_pages(i, pages, maxsize, maxpages, start);
1321        if (unlikely(iov_iter_is_discard(i)))
1322                return -EFAULT;
1323
1324        iterate_all_kinds(i, maxsize, v, ({
1325                unsigned long addr = (unsigned long)v.iov_base;
1326                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1327                int n;
1328                int res;
1329
1330                if (len > maxpages * PAGE_SIZE)
1331                        len = maxpages * PAGE_SIZE;
1332                addr &= ~(PAGE_SIZE - 1);
1333                n = DIV_ROUND_UP(len, PAGE_SIZE);
1334                res = get_user_pages_fast(addr, n,
1335                                iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1336                                pages);
1337                if (unlikely(res < 0))
1338                        return res;
1339                return (res == n ? len : res * PAGE_SIZE) - *start;
1340        0;}),({
1341                /* can't be more than PAGE_SIZE */
1342                *start = v.bv_offset;
1343                get_page(*pages = v.bv_page);
1344                return v.bv_len;
1345        }),({
1346                return -EFAULT;
1347        })
1348        )
1349        return 0;
1350}
1351EXPORT_SYMBOL(iov_iter_get_pages);
1352
1353static struct page **get_pages_array(size_t n)
1354{
1355        return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1356}
1357
1358static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1359                   struct page ***pages, size_t maxsize,
1360                   size_t *start)
1361{
1362        struct page **p;
1363        unsigned int iter_head, npages;
1364        ssize_t n;
1365
1366        if (!maxsize)
1367                return 0;
1368
1369        if (!sanity(i))
1370                return -EFAULT;
1371
1372        data_start(i, &iter_head, start);
1373        /* Amount of free space: some of this one + all after this one */
1374        npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1375        n = npages * PAGE_SIZE - *start;
1376        if (maxsize > n)
1377                maxsize = n;
1378        else
1379                npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1380        p = get_pages_array(npages);
1381        if (!p)
1382                return -ENOMEM;
1383        n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1384        if (n > 0)
1385                *pages = p;
1386        else
1387                kvfree(p);
1388        return n;
1389}
1390
1391ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1392                   struct page ***pages, size_t maxsize,
1393                   size_t *start)
1394{
1395        struct page **p;
1396
1397        if (maxsize > i->count)
1398                maxsize = i->count;
1399
1400        if (unlikely(iov_iter_is_pipe(i)))
1401                return pipe_get_pages_alloc(i, pages, maxsize, start);
1402        if (unlikely(iov_iter_is_discard(i)))
1403                return -EFAULT;
1404
1405        iterate_all_kinds(i, maxsize, v, ({
1406                unsigned long addr = (unsigned long)v.iov_base;
1407                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1408                int n;
1409                int res;
1410
1411                addr &= ~(PAGE_SIZE - 1);
1412                n = DIV_ROUND_UP(len, PAGE_SIZE);
1413                p = get_pages_array(n);
1414                if (!p)
1415                        return -ENOMEM;
1416                res = get_user_pages_fast(addr, n,
1417                                iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1418                if (unlikely(res < 0)) {
1419                        kvfree(p);
1420                        return res;
1421                }
1422                *pages = p;
1423                return (res == n ? len : res * PAGE_SIZE) - *start;
1424        0;}),({
1425                /* can't be more than PAGE_SIZE */
1426                *start = v.bv_offset;
1427                *pages = p = get_pages_array(1);
1428                if (!p)
1429                        return -ENOMEM;
1430                get_page(*p = v.bv_page);
1431                return v.bv_len;
1432        }),({
1433                return -EFAULT;
1434        })
1435        )
1436        return 0;
1437}
1438EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1439
1440size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1441                               struct iov_iter *i)
1442{
1443        char *to = addr;
1444        __wsum sum, next;
1445        size_t off = 0;
1446        sum = *csum;
1447        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1448                WARN_ON(1);
1449                return 0;
1450        }
1451        iterate_and_advance(i, bytes, v, ({
1452                int err = 0;
1453                next = csum_and_copy_from_user(v.iov_base,
1454                                               (to += v.iov_len) - v.iov_len,
1455                                               v.iov_len, 0, &err);
1456                if (!err) {
1457                        sum = csum_block_add(sum, next, off);
1458                        off += v.iov_len;
1459                }
1460                err ? v.iov_len : 0;
1461        }), ({
1462                char *p = kmap_atomic(v.bv_page);
1463                sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1464                                      p + v.bv_offset, v.bv_len,
1465                                      sum, off);
1466                kunmap_atomic(p);
1467                off += v.bv_len;
1468        }),({
1469                sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1470                                      v.iov_base, v.iov_len,
1471                                      sum, off);
1472                off += v.iov_len;
1473        })
1474        )
1475        *csum = sum;
1476        return bytes;
1477}
1478EXPORT_SYMBOL(csum_and_copy_from_iter);
1479
1480bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1481                               struct iov_iter *i)
1482{
1483        char *to = addr;
1484        __wsum sum, next;
1485        size_t off = 0;
1486        sum = *csum;
1487        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1488                WARN_ON(1);
1489                return false;
1490        }
1491        if (unlikely(i->count < bytes))
1492                return false;
1493        iterate_all_kinds(i, bytes, v, ({
1494                int err = 0;
1495                next = csum_and_copy_from_user(v.iov_base,
1496                                               (to += v.iov_len) - v.iov_len,
1497                                               v.iov_len, 0, &err);
1498                if (err)
1499                        return false;
1500                sum = csum_block_add(sum, next, off);
1501                off += v.iov_len;
1502                0;
1503        }), ({
1504                char *p = kmap_atomic(v.bv_page);
1505                sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1506                                      p + v.bv_offset, v.bv_len,
1507                                      sum, off);
1508                kunmap_atomic(p);
1509                off += v.bv_len;
1510        }),({
1511                sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1512                                      v.iov_base, v.iov_len,
1513                                      sum, off);
1514                off += v.iov_len;
1515        })
1516        )
1517        *csum = sum;
1518        iov_iter_advance(i, bytes);
1519        return true;
1520}
1521EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1522
1523size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
1524                             struct iov_iter *i)
1525{
1526        const char *from = addr;
1527        __wsum *csum = csump;
1528        __wsum sum, next;
1529        size_t off = 0;
1530
1531        if (unlikely(iov_iter_is_pipe(i)))
1532                return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
1533
1534        sum = *csum;
1535        if (unlikely(iov_iter_is_discard(i))) {
1536                WARN_ON(1);     /* for now */
1537                return 0;
1538        }
1539        iterate_and_advance(i, bytes, v, ({
1540                int err = 0;
1541                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1542                                             v.iov_base,
1543                                             v.iov_len, 0, &err);
1544                if (!err) {
1545                        sum = csum_block_add(sum, next, off);
1546                        off += v.iov_len;
1547                }
1548                err ? v.iov_len : 0;
1549        }), ({
1550                char *p = kmap_atomic(v.bv_page);
1551                sum = csum_and_memcpy(p + v.bv_offset,
1552                                      (from += v.bv_len) - v.bv_len,
1553                                      v.bv_len, sum, off);
1554                kunmap_atomic(p);
1555                off += v.bv_len;
1556        }),({
1557                sum = csum_and_memcpy(v.iov_base,
1558                                     (from += v.iov_len) - v.iov_len,
1559                                     v.iov_len, sum, off);
1560                off += v.iov_len;
1561        })
1562        )
1563        *csum = sum;
1564        return bytes;
1565}
1566EXPORT_SYMBOL(csum_and_copy_to_iter);
1567
1568size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1569                struct iov_iter *i)
1570{
1571#ifdef CONFIG_CRYPTO_HASH
1572        struct ahash_request *hash = hashp;
1573        struct scatterlist sg;
1574        size_t copied;
1575
1576        copied = copy_to_iter(addr, bytes, i);
1577        sg_init_one(&sg, addr, copied);
1578        ahash_request_set_crypt(hash, &sg, NULL, copied);
1579        crypto_ahash_update(hash);
1580        return copied;
1581#else
1582        return 0;
1583#endif
1584}
1585EXPORT_SYMBOL(hash_and_copy_to_iter);
1586
1587int iov_iter_npages(const struct iov_iter *i, int maxpages)
1588{
1589        size_t size = i->count;
1590        int npages = 0;
1591
1592        if (!size)
1593                return 0;
1594        if (unlikely(iov_iter_is_discard(i)))
1595                return 0;
1596
1597        if (unlikely(iov_iter_is_pipe(i))) {
1598                struct pipe_inode_info *pipe = i->pipe;
1599                unsigned int iter_head;
1600                size_t off;
1601
1602                if (!sanity(i))
1603                        return 0;
1604
1605                data_start(i, &iter_head, &off);
1606                /* some of this one + all after this one */
1607                npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1608                if (npages >= maxpages)
1609                        return maxpages;
1610        } else iterate_all_kinds(i, size, v, ({
1611                unsigned long p = (unsigned long)v.iov_base;
1612                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1613                        - p / PAGE_SIZE;
1614                if (npages >= maxpages)
1615                        return maxpages;
1616        0;}),({
1617                npages++;
1618                if (npages >= maxpages)
1619                        return maxpages;
1620        }),({
1621                unsigned long p = (unsigned long)v.iov_base;
1622                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1623                        - p / PAGE_SIZE;
1624                if (npages >= maxpages)
1625                        return maxpages;
1626        })
1627        )
1628        return npages;
1629}
1630EXPORT_SYMBOL(iov_iter_npages);
1631
1632const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1633{
1634        *new = *old;
1635        if (unlikely(iov_iter_is_pipe(new))) {
1636                WARN_ON(1);
1637                return NULL;
1638        }
1639        if (unlikely(iov_iter_is_discard(new)))
1640                return NULL;
1641        if (iov_iter_is_bvec(new))
1642                return new->bvec = kmemdup(new->bvec,
1643                                    new->nr_segs * sizeof(struct bio_vec),
1644                                    flags);
1645        else
1646                /* iovec and kvec have identical layout */
1647                return new->iov = kmemdup(new->iov,
1648                                   new->nr_segs * sizeof(struct iovec),
1649                                   flags);
1650}
1651EXPORT_SYMBOL(dup_iter);
1652
1653/**
1654 * import_iovec() - Copy an array of &struct iovec from userspace
1655 *     into the kernel, check that it is valid, and initialize a new
1656 *     &struct iov_iter iterator to access it.
1657 *
1658 * @type: One of %READ or %WRITE.
1659 * @uvector: Pointer to the userspace array.
1660 * @nr_segs: Number of elements in userspace array.
1661 * @fast_segs: Number of elements in @iov.
1662 * @iov: (input and output parameter) Pointer to pointer to (usually small
1663 *     on-stack) kernel array.
1664 * @i: Pointer to iterator that will be initialized on success.
1665 *
1666 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1667 * then this function places %NULL in *@iov on return. Otherwise, a new
1668 * array will be allocated and the result placed in *@iov. This means that
1669 * the caller may call kfree() on *@iov regardless of whether the small
1670 * on-stack array was used or not (and regardless of whether this function
1671 * returns an error or not).
1672 *
1673 * Return: Negative error code on error, bytes imported on success
1674 */
1675ssize_t import_iovec(int type, const struct iovec __user * uvector,
1676                 unsigned nr_segs, unsigned fast_segs,
1677                 struct iovec **iov, struct iov_iter *i)
1678{
1679        ssize_t n;
1680        struct iovec *p;
1681        n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1682                                  *iov, &p);
1683        if (n < 0) {
1684                if (p != *iov)
1685                        kfree(p);
1686                *iov = NULL;
1687                return n;
1688        }
1689        iov_iter_init(i, type, p, nr_segs, n);
1690        *iov = p == *iov ? NULL : p;
1691        return n;
1692}
1693EXPORT_SYMBOL(import_iovec);
1694
1695#ifdef CONFIG_COMPAT
1696#include <linux/compat.h>
1697
1698ssize_t compat_import_iovec(int type,
1699                const struct compat_iovec __user * uvector,
1700                unsigned nr_segs, unsigned fast_segs,
1701                struct iovec **iov, struct iov_iter *i)
1702{
1703        ssize_t n;
1704        struct iovec *p;
1705        n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1706                                  *iov, &p);
1707        if (n < 0) {
1708                if (p != *iov)
1709                        kfree(p);
1710                *iov = NULL;
1711                return n;
1712        }
1713        iov_iter_init(i, type, p, nr_segs, n);
1714        *iov = p == *iov ? NULL : p;
1715        return n;
1716}
1717EXPORT_SYMBOL(compat_import_iovec);
1718#endif
1719
1720int import_single_range(int rw, void __user *buf, size_t len,
1721                 struct iovec *iov, struct iov_iter *i)
1722{
1723        if (len > MAX_RW_COUNT)
1724                len = MAX_RW_COUNT;
1725        if (unlikely(!access_ok(buf, len)))
1726                return -EFAULT;
1727
1728        iov->iov_base = buf;
1729        iov->iov_len = len;
1730        iov_iter_init(i, rw, iov, 1, len);
1731        return 0;
1732}
1733EXPORT_SYMBOL(import_single_range);
1734
1735int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1736                            int (*f)(struct kvec *vec, void *context),
1737                            void *context)
1738{
1739        struct kvec w;
1740        int err = -EINVAL;
1741        if (!bytes)
1742                return 0;
1743
1744        iterate_all_kinds(i, bytes, v, -EINVAL, ({
1745                w.iov_base = kmap(v.bv_page) + v.bv_offset;
1746                w.iov_len = v.bv_len;
1747                err = f(&w, context);
1748                kunmap(v.bv_page);
1749                err;}), ({
1750                w = v;
1751                err = f(&w, context);})
1752        )
1753        return err;
1754}
1755EXPORT_SYMBOL(iov_iter_for_each_range);
1756