linux/lib/iov_iter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <crypto/hash.h>
   3#include <linux/export.h>
   4#include <linux/bvec.h>
   5#include <linux/fault-inject-usercopy.h>
   6#include <linux/uio.h>
   7#include <linux/pagemap.h>
   8#include <linux/slab.h>
   9#include <linux/vmalloc.h>
  10#include <linux/splice.h>
  11#include <linux/compat.h>
  12#include <net/checksum.h>
  13#include <linux/scatterlist.h>
  14#include <linux/instrumented.h>
  15
  16#define PIPE_PARANOIA /* for now */
  17
  18#define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
  19        size_t left;                                    \
  20        size_t wanted = n;                              \
  21        __p = i->iov;                                   \
  22        __v.iov_len = min(n, __p->iov_len - skip);      \
  23        if (likely(__v.iov_len)) {                      \
  24                __v.iov_base = __p->iov_base + skip;    \
  25                left = (STEP);                          \
  26                __v.iov_len -= left;                    \
  27                skip += __v.iov_len;                    \
  28                n -= __v.iov_len;                       \
  29        } else {                                        \
  30                left = 0;                               \
  31        }                                               \
  32        while (unlikely(!left && n)) {                  \
  33                __p++;                                  \
  34                __v.iov_len = min(n, __p->iov_len);     \
  35                if (unlikely(!__v.iov_len))             \
  36                        continue;                       \
  37                __v.iov_base = __p->iov_base;           \
  38                left = (STEP);                          \
  39                __v.iov_len -= left;                    \
  40                skip = __v.iov_len;                     \
  41                n -= __v.iov_len;                       \
  42        }                                               \
  43        n = wanted - n;                                 \
  44}
  45
  46#define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
  47        size_t wanted = n;                              \
  48        __p = i->kvec;                                  \
  49        __v.iov_len = min(n, __p->iov_len - skip);      \
  50        if (likely(__v.iov_len)) {                      \
  51                __v.iov_base = __p->iov_base + skip;    \
  52                (void)(STEP);                           \
  53                skip += __v.iov_len;                    \
  54                n -= __v.iov_len;                       \
  55        }                                               \
  56        while (unlikely(n)) {                           \
  57                __p++;                                  \
  58                __v.iov_len = min(n, __p->iov_len);     \
  59                if (unlikely(!__v.iov_len))             \
  60                        continue;                       \
  61                __v.iov_base = __p->iov_base;           \
  62                (void)(STEP);                           \
  63                skip = __v.iov_len;                     \
  64                n -= __v.iov_len;                       \
  65        }                                               \
  66        n = wanted;                                     \
  67}
  68
  69#define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
  70        struct bvec_iter __start;                       \
  71        __start.bi_size = n;                            \
  72        __start.bi_bvec_done = skip;                    \
  73        __start.bi_idx = 0;                             \
  74        for_each_bvec(__v, i->bvec, __bi, __start) {    \
  75                if (!__v.bv_len)                        \
  76                        continue;                       \
  77                (void)(STEP);                           \
  78        }                                               \
  79}
  80
  81#define iterate_all_kinds(i, n, v, I, B, K) {                   \
  82        if (likely(n)) {                                        \
  83                size_t skip = i->iov_offset;                    \
  84                if (unlikely(i->type & ITER_BVEC)) {            \
  85                        struct bio_vec v;                       \
  86                        struct bvec_iter __bi;                  \
  87                        iterate_bvec(i, n, v, __bi, skip, (B))  \
  88                } else if (unlikely(i->type & ITER_KVEC)) {     \
  89                        const struct kvec *kvec;                \
  90                        struct kvec v;                          \
  91                        iterate_kvec(i, n, v, kvec, skip, (K))  \
  92                } else if (unlikely(i->type & ITER_DISCARD)) {  \
  93                } else {                                        \
  94                        const struct iovec *iov;                \
  95                        struct iovec v;                         \
  96                        iterate_iovec(i, n, v, iov, skip, (I))  \
  97                }                                               \
  98        }                                                       \
  99}
 100
 101#define iterate_and_advance(i, n, v, I, B, K) {                 \
 102        if (unlikely(i->count < n))                             \
 103                n = i->count;                                   \
 104        if (i->count) {                                         \
 105                size_t skip = i->iov_offset;                    \
 106                if (unlikely(i->type & ITER_BVEC)) {            \
 107                        const struct bio_vec *bvec = i->bvec;   \
 108                        struct bio_vec v;                       \
 109                        struct bvec_iter __bi;                  \
 110                        iterate_bvec(i, n, v, __bi, skip, (B))  \
 111                        i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
 112                        i->nr_segs -= i->bvec - bvec;           \
 113                        skip = __bi.bi_bvec_done;               \
 114                } else if (unlikely(i->type & ITER_KVEC)) {     \
 115                        const struct kvec *kvec;                \
 116                        struct kvec v;                          \
 117                        iterate_kvec(i, n, v, kvec, skip, (K))  \
 118                        if (skip == kvec->iov_len) {            \
 119                                kvec++;                         \
 120                                skip = 0;                       \
 121                        }                                       \
 122                        i->nr_segs -= kvec - i->kvec;           \
 123                        i->kvec = kvec;                         \
 124                } else if (unlikely(i->type & ITER_DISCARD)) {  \
 125                        skip += n;                              \
 126                } else {                                        \
 127                        const struct iovec *iov;                \
 128                        struct iovec v;                         \
 129                        iterate_iovec(i, n, v, iov, skip, (I))  \
 130                        if (skip == iov->iov_len) {             \
 131                                iov++;                          \
 132                                skip = 0;                       \
 133                        }                                       \
 134                        i->nr_segs -= iov - i->iov;             \
 135                        i->iov = iov;                           \
 136                }                                               \
 137                i->count -= n;                                  \
 138                i->iov_offset = skip;                           \
 139        }                                                       \
 140}
 141
 142static int copyout(void __user *to, const void *from, size_t n)
 143{
 144        if (should_fail_usercopy())
 145                return n;
 146        if (access_ok(to, n)) {
 147                instrument_copy_to_user(to, from, n);
 148                n = raw_copy_to_user(to, from, n);
 149        }
 150        return n;
 151}
 152
 153static int copyin(void *to, const void __user *from, size_t n)
 154{
 155        if (should_fail_usercopy())
 156                return n;
 157        if (access_ok(from, n)) {
 158                instrument_copy_from_user(to, from, n);
 159                n = raw_copy_from_user(to, from, n);
 160        }
 161        return n;
 162}
 163
 164static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 165                         struct iov_iter *i)
 166{
 167        size_t skip, copy, left, wanted;
 168        const struct iovec *iov;
 169        char __user *buf;
 170        void *kaddr, *from;
 171
 172        if (unlikely(bytes > i->count))
 173                bytes = i->count;
 174
 175        if (unlikely(!bytes))
 176                return 0;
 177
 178        might_fault();
 179        wanted = bytes;
 180        iov = i->iov;
 181        skip = i->iov_offset;
 182        buf = iov->iov_base + skip;
 183        copy = min(bytes, iov->iov_len - skip);
 184
 185        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
 186                kaddr = kmap_atomic(page);
 187                from = kaddr + offset;
 188
 189                /* first chunk, usually the only one */
 190                left = copyout(buf, from, copy);
 191                copy -= left;
 192                skip += copy;
 193                from += copy;
 194                bytes -= copy;
 195
 196                while (unlikely(!left && bytes)) {
 197                        iov++;
 198                        buf = iov->iov_base;
 199                        copy = min(bytes, iov->iov_len);
 200                        left = copyout(buf, from, copy);
 201                        copy -= left;
 202                        skip = copy;
 203                        from += copy;
 204                        bytes -= copy;
 205                }
 206                if (likely(!bytes)) {
 207                        kunmap_atomic(kaddr);
 208                        goto done;
 209                }
 210                offset = from - kaddr;
 211                buf += copy;
 212                kunmap_atomic(kaddr);
 213                copy = min(bytes, iov->iov_len - skip);
 214        }
 215        /* Too bad - revert to non-atomic kmap */
 216
 217        kaddr = kmap(page);
 218        from = kaddr + offset;
 219        left = copyout(buf, from, copy);
 220        copy -= left;
 221        skip += copy;
 222        from += copy;
 223        bytes -= copy;
 224        while (unlikely(!left && bytes)) {
 225                iov++;
 226                buf = iov->iov_base;
 227                copy = min(bytes, iov->iov_len);
 228                left = copyout(buf, from, copy);
 229                copy -= left;
 230                skip = copy;
 231                from += copy;
 232                bytes -= copy;
 233        }
 234        kunmap(page);
 235
 236done:
 237        if (skip == iov->iov_len) {
 238                iov++;
 239                skip = 0;
 240        }
 241        i->count -= wanted - bytes;
 242        i->nr_segs -= iov - i->iov;
 243        i->iov = iov;
 244        i->iov_offset = skip;
 245        return wanted - bytes;
 246}
 247
 248static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
 249                         struct iov_iter *i)
 250{
 251        size_t skip, copy, left, wanted;
 252        const struct iovec *iov;
 253        char __user *buf;
 254        void *kaddr, *to;
 255
 256        if (unlikely(bytes > i->count))
 257                bytes = i->count;
 258
 259        if (unlikely(!bytes))
 260                return 0;
 261
 262        might_fault();
 263        wanted = bytes;
 264        iov = i->iov;
 265        skip = i->iov_offset;
 266        buf = iov->iov_base + skip;
 267        copy = min(bytes, iov->iov_len - skip);
 268
 269        if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
 270                kaddr = kmap_atomic(page);
 271                to = kaddr + offset;
 272
 273                /* first chunk, usually the only one */
 274                left = copyin(to, buf, copy);
 275                copy -= left;
 276                skip += copy;
 277                to += copy;
 278                bytes -= copy;
 279
 280                while (unlikely(!left && bytes)) {
 281                        iov++;
 282                        buf = iov->iov_base;
 283                        copy = min(bytes, iov->iov_len);
 284                        left = copyin(to, buf, copy);
 285                        copy -= left;
 286                        skip = copy;
 287                        to += copy;
 288                        bytes -= copy;
 289                }
 290                if (likely(!bytes)) {
 291                        kunmap_atomic(kaddr);
 292                        goto done;
 293                }
 294                offset = to - kaddr;
 295                buf += copy;
 296                kunmap_atomic(kaddr);
 297                copy = min(bytes, iov->iov_len - skip);
 298        }
 299        /* Too bad - revert to non-atomic kmap */
 300
 301        kaddr = kmap(page);
 302        to = kaddr + offset;
 303        left = copyin(to, buf, copy);
 304        copy -= left;
 305        skip += copy;
 306        to += copy;
 307        bytes -= copy;
 308        while (unlikely(!left && bytes)) {
 309                iov++;
 310                buf = iov->iov_base;
 311                copy = min(bytes, iov->iov_len);
 312                left = copyin(to, buf, copy);
 313                copy -= left;
 314                skip = copy;
 315                to += copy;
 316                bytes -= copy;
 317        }
 318        kunmap(page);
 319
 320done:
 321        if (skip == iov->iov_len) {
 322                iov++;
 323                skip = 0;
 324        }
 325        i->count -= wanted - bytes;
 326        i->nr_segs -= iov - i->iov;
 327        i->iov = iov;
 328        i->iov_offset = skip;
 329        return wanted - bytes;
 330}
 331
 332#ifdef PIPE_PARANOIA
 333static bool sanity(const struct iov_iter *i)
 334{
 335        struct pipe_inode_info *pipe = i->pipe;
 336        unsigned int p_head = pipe->head;
 337        unsigned int p_tail = pipe->tail;
 338        unsigned int p_mask = pipe->ring_size - 1;
 339        unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
 340        unsigned int i_head = i->head;
 341        unsigned int idx;
 342
 343        if (i->iov_offset) {
 344                struct pipe_buffer *p;
 345                if (unlikely(p_occupancy == 0))
 346                        goto Bad;       // pipe must be non-empty
 347                if (unlikely(i_head != p_head - 1))
 348                        goto Bad;       // must be at the last buffer...
 349
 350                p = &pipe->bufs[i_head & p_mask];
 351                if (unlikely(p->offset + p->len != i->iov_offset))
 352                        goto Bad;       // ... at the end of segment
 353        } else {
 354                if (i_head != p_head)
 355                        goto Bad;       // must be right after the last buffer
 356        }
 357        return true;
 358Bad:
 359        printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
 360        printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
 361                        p_head, p_tail, pipe->ring_size);
 362        for (idx = 0; idx < pipe->ring_size; idx++)
 363                printk(KERN_ERR "[%p %p %d %d]\n",
 364                        pipe->bufs[idx].ops,
 365                        pipe->bufs[idx].page,
 366                        pipe->bufs[idx].offset,
 367                        pipe->bufs[idx].len);
 368        WARN_ON(1);
 369        return false;
 370}
 371#else
 372#define sanity(i) true
 373#endif
 374
 375static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
 376                         struct iov_iter *i)
 377{
 378        struct pipe_inode_info *pipe = i->pipe;
 379        struct pipe_buffer *buf;
 380        unsigned int p_tail = pipe->tail;
 381        unsigned int p_mask = pipe->ring_size - 1;
 382        unsigned int i_head = i->head;
 383        size_t off;
 384
 385        if (unlikely(bytes > i->count))
 386                bytes = i->count;
 387
 388        if (unlikely(!bytes))
 389                return 0;
 390
 391        if (!sanity(i))
 392                return 0;
 393
 394        off = i->iov_offset;
 395        buf = &pipe->bufs[i_head & p_mask];
 396        if (off) {
 397                if (offset == off && buf->page == page) {
 398                        /* merge with the last one */
 399                        buf->len += bytes;
 400                        i->iov_offset += bytes;
 401                        goto out;
 402                }
 403                i_head++;
 404                buf = &pipe->bufs[i_head & p_mask];
 405        }
 406        if (pipe_full(i_head, p_tail, pipe->max_usage))
 407                return 0;
 408
 409        buf->ops = &page_cache_pipe_buf_ops;
 410        get_page(page);
 411        buf->page = page;
 412        buf->offset = offset;
 413        buf->len = bytes;
 414
 415        pipe->head = i_head + 1;
 416        i->iov_offset = offset + bytes;
 417        i->head = i_head;
 418out:
 419        i->count -= bytes;
 420        return bytes;
 421}
 422
 423/*
 424 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
 425 * bytes.  For each iovec, fault in each page that constitutes the iovec.
 426 *
 427 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
 428 * because it is an invalid address).
 429 */
 430int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 431{
 432        size_t skip = i->iov_offset;
 433        const struct iovec *iov;
 434        int err;
 435        struct iovec v;
 436
 437        if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
 438                iterate_iovec(i, bytes, v, iov, skip, ({
 439                        err = fault_in_pages_readable(v.iov_base, v.iov_len);
 440                        if (unlikely(err))
 441                        return err;
 442                0;}))
 443        }
 444        return 0;
 445}
 446EXPORT_SYMBOL(iov_iter_fault_in_readable);
 447
 448void iov_iter_init(struct iov_iter *i, unsigned int direction,
 449                        const struct iovec *iov, unsigned long nr_segs,
 450                        size_t count)
 451{
 452        WARN_ON(direction & ~(READ | WRITE));
 453        direction &= READ | WRITE;
 454
 455        /* It will get better.  Eventually... */
 456        if (uaccess_kernel()) {
 457                i->type = ITER_KVEC | direction;
 458                i->kvec = (struct kvec *)iov;
 459        } else {
 460                i->type = ITER_IOVEC | direction;
 461                i->iov = iov;
 462        }
 463        i->nr_segs = nr_segs;
 464        i->iov_offset = 0;
 465        i->count = count;
 466}
 467EXPORT_SYMBOL(iov_iter_init);
 468
 469static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
 470{
 471        char *from = kmap_atomic(page);
 472        memcpy(to, from + offset, len);
 473        kunmap_atomic(from);
 474}
 475
 476static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
 477{
 478        char *to = kmap_atomic(page);
 479        memcpy(to + offset, from, len);
 480        kunmap_atomic(to);
 481}
 482
 483static void memzero_page(struct page *page, size_t offset, size_t len)
 484{
 485        char *addr = kmap_atomic(page);
 486        memset(addr + offset, 0, len);
 487        kunmap_atomic(addr);
 488}
 489
 490static inline bool allocated(struct pipe_buffer *buf)
 491{
 492        return buf->ops == &default_pipe_buf_ops;
 493}
 494
 495static inline void data_start(const struct iov_iter *i,
 496                              unsigned int *iter_headp, size_t *offp)
 497{
 498        unsigned int p_mask = i->pipe->ring_size - 1;
 499        unsigned int iter_head = i->head;
 500        size_t off = i->iov_offset;
 501
 502        if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
 503                    off == PAGE_SIZE)) {
 504                iter_head++;
 505                off = 0;
 506        }
 507        *iter_headp = iter_head;
 508        *offp = off;
 509}
 510
 511static size_t push_pipe(struct iov_iter *i, size_t size,
 512                        int *iter_headp, size_t *offp)
 513{
 514        struct pipe_inode_info *pipe = i->pipe;
 515        unsigned int p_tail = pipe->tail;
 516        unsigned int p_mask = pipe->ring_size - 1;
 517        unsigned int iter_head;
 518        size_t off;
 519        ssize_t left;
 520
 521        if (unlikely(size > i->count))
 522                size = i->count;
 523        if (unlikely(!size))
 524                return 0;
 525
 526        left = size;
 527        data_start(i, &iter_head, &off);
 528        *iter_headp = iter_head;
 529        *offp = off;
 530        if (off) {
 531                left -= PAGE_SIZE - off;
 532                if (left <= 0) {
 533                        pipe->bufs[iter_head & p_mask].len += size;
 534                        return size;
 535                }
 536                pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
 537                iter_head++;
 538        }
 539        while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
 540                struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
 541                struct page *page = alloc_page(GFP_USER);
 542                if (!page)
 543                        break;
 544
 545                buf->ops = &default_pipe_buf_ops;
 546                buf->page = page;
 547                buf->offset = 0;
 548                buf->len = min_t(ssize_t, left, PAGE_SIZE);
 549                left -= buf->len;
 550                iter_head++;
 551                pipe->head = iter_head;
 552
 553                if (left == 0)
 554                        return size;
 555        }
 556        return size - left;
 557}
 558
 559static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 560                                struct iov_iter *i)
 561{
 562        struct pipe_inode_info *pipe = i->pipe;
 563        unsigned int p_mask = pipe->ring_size - 1;
 564        unsigned int i_head;
 565        size_t n, off;
 566
 567        if (!sanity(i))
 568                return 0;
 569
 570        bytes = n = push_pipe(i, bytes, &i_head, &off);
 571        if (unlikely(!n))
 572                return 0;
 573        do {
 574                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 575                memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
 576                i->head = i_head;
 577                i->iov_offset = off + chunk;
 578                n -= chunk;
 579                addr += chunk;
 580                off = 0;
 581                i_head++;
 582        } while (n);
 583        i->count -= bytes;
 584        return bytes;
 585}
 586
 587static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 588                              __wsum sum, size_t off)
 589{
 590        __wsum next = csum_partial_copy_nocheck(from, to, len);
 591        return csum_block_add(sum, next, off);
 592}
 593
 594static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
 595                                __wsum *csum, struct iov_iter *i)
 596{
 597        struct pipe_inode_info *pipe = i->pipe;
 598        unsigned int p_mask = pipe->ring_size - 1;
 599        unsigned int i_head;
 600        size_t n, r;
 601        size_t off = 0;
 602        __wsum sum = *csum;
 603
 604        if (!sanity(i))
 605                return 0;
 606
 607        bytes = n = push_pipe(i, bytes, &i_head, &r);
 608        if (unlikely(!n))
 609                return 0;
 610        do {
 611                size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
 612                char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
 613                sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
 614                kunmap_atomic(p);
 615                i->head = i_head;
 616                i->iov_offset = r + chunk;
 617                n -= chunk;
 618                off += chunk;
 619                addr += chunk;
 620                r = 0;
 621                i_head++;
 622        } while (n);
 623        i->count -= bytes;
 624        *csum = sum;
 625        return bytes;
 626}
 627
 628size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 629{
 630        const char *from = addr;
 631        if (unlikely(iov_iter_is_pipe(i)))
 632                return copy_pipe_to_iter(addr, bytes, i);
 633        if (iter_is_iovec(i))
 634                might_fault();
 635        iterate_and_advance(i, bytes, v,
 636                copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 637                memcpy_to_page(v.bv_page, v.bv_offset,
 638                               (from += v.bv_len) - v.bv_len, v.bv_len),
 639                memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
 640        )
 641
 642        return bytes;
 643}
 644EXPORT_SYMBOL(_copy_to_iter);
 645
 646#ifdef CONFIG_ARCH_HAS_COPY_MC
 647static int copyout_mc(void __user *to, const void *from, size_t n)
 648{
 649        if (access_ok(to, n)) {
 650                instrument_copy_to_user(to, from, n);
 651                n = copy_mc_to_user((__force void *) to, from, n);
 652        }
 653        return n;
 654}
 655
 656static unsigned long copy_mc_to_page(struct page *page, size_t offset,
 657                const char *from, size_t len)
 658{
 659        unsigned long ret;
 660        char *to;
 661
 662        to = kmap_atomic(page);
 663        ret = copy_mc_to_kernel(to + offset, from, len);
 664        kunmap_atomic(to);
 665
 666        return ret;
 667}
 668
 669static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
 670                                struct iov_iter *i)
 671{
 672        struct pipe_inode_info *pipe = i->pipe;
 673        unsigned int p_mask = pipe->ring_size - 1;
 674        unsigned int i_head;
 675        size_t n, off, xfer = 0;
 676
 677        if (!sanity(i))
 678                return 0;
 679
 680        bytes = n = push_pipe(i, bytes, &i_head, &off);
 681        if (unlikely(!n))
 682                return 0;
 683        do {
 684                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 685                unsigned long rem;
 686
 687                rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
 688                                            off, addr, chunk);
 689                i->head = i_head;
 690                i->iov_offset = off + chunk - rem;
 691                xfer += chunk - rem;
 692                if (rem)
 693                        break;
 694                n -= chunk;
 695                addr += chunk;
 696                off = 0;
 697                i_head++;
 698        } while (n);
 699        i->count -= xfer;
 700        return xfer;
 701}
 702
 703/**
 704 * _copy_mc_to_iter - copy to iter with source memory error exception handling
 705 * @addr: source kernel address
 706 * @bytes: total transfer length
 707 * @iter: destination iterator
 708 *
 709 * The pmem driver deploys this for the dax operation
 710 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
 711 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
 712 * successfully copied.
 713 *
 714 * The main differences between this and typical _copy_to_iter().
 715 *
 716 * * Typical tail/residue handling after a fault retries the copy
 717 *   byte-by-byte until the fault happens again. Re-triggering machine
 718 *   checks is potentially fatal so the implementation uses source
 719 *   alignment and poison alignment assumptions to avoid re-triggering
 720 *   hardware exceptions.
 721 *
 722 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
 723 *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
 724 *   a short copy.
 725 */
 726size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 727{
 728        const char *from = addr;
 729        unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 730
 731        if (unlikely(iov_iter_is_pipe(i)))
 732                return copy_mc_pipe_to_iter(addr, bytes, i);
 733        if (iter_is_iovec(i))
 734                might_fault();
 735        iterate_and_advance(i, bytes, v,
 736                copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
 737                           v.iov_len),
 738                ({
 739                rem = copy_mc_to_page(v.bv_page, v.bv_offset,
 740                                      (from += v.bv_len) - v.bv_len, v.bv_len);
 741                if (rem) {
 742                        curr_addr = (unsigned long) from;
 743                        bytes = curr_addr - s_addr - rem;
 744                        return bytes;
 745                }
 746                }),
 747                ({
 748                rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
 749                                        - v.iov_len, v.iov_len);
 750                if (rem) {
 751                        curr_addr = (unsigned long) from;
 752                        bytes = curr_addr - s_addr - rem;
 753                        return bytes;
 754                }
 755                })
 756        )
 757
 758        return bytes;
 759}
 760EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
 761#endif /* CONFIG_ARCH_HAS_COPY_MC */
 762
 763size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 764{
 765        char *to = addr;
 766        if (unlikely(iov_iter_is_pipe(i))) {
 767                WARN_ON(1);
 768                return 0;
 769        }
 770        if (iter_is_iovec(i))
 771                might_fault();
 772        iterate_and_advance(i, bytes, v,
 773                copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
 774                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 775                                 v.bv_offset, v.bv_len),
 776                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 777        )
 778
 779        return bytes;
 780}
 781EXPORT_SYMBOL(_copy_from_iter);
 782
 783bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 784{
 785        char *to = addr;
 786        if (unlikely(iov_iter_is_pipe(i))) {
 787                WARN_ON(1);
 788                return false;
 789        }
 790        if (unlikely(i->count < bytes))
 791                return false;
 792
 793        if (iter_is_iovec(i))
 794                might_fault();
 795        iterate_all_kinds(i, bytes, v, ({
 796                if (copyin((to += v.iov_len) - v.iov_len,
 797                                      v.iov_base, v.iov_len))
 798                        return false;
 799                0;}),
 800                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 801                                 v.bv_offset, v.bv_len),
 802                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 803        )
 804
 805        iov_iter_advance(i, bytes);
 806        return true;
 807}
 808EXPORT_SYMBOL(_copy_from_iter_full);
 809
 810size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 811{
 812        char *to = addr;
 813        if (unlikely(iov_iter_is_pipe(i))) {
 814                WARN_ON(1);
 815                return 0;
 816        }
 817        iterate_and_advance(i, bytes, v,
 818                __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 819                                         v.iov_base, v.iov_len),
 820                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 821                                 v.bv_offset, v.bv_len),
 822                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 823        )
 824
 825        return bytes;
 826}
 827EXPORT_SYMBOL(_copy_from_iter_nocache);
 828
 829#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 830/**
 831 * _copy_from_iter_flushcache - write destination through cpu cache
 832 * @addr: destination kernel address
 833 * @bytes: total transfer length
 834 * @iter: source iterator
 835 *
 836 * The pmem driver arranges for filesystem-dax to use this facility via
 837 * dax_copy_from_iter() for ensuring that writes to persistent memory
 838 * are flushed through the CPU cache. It is differentiated from
 839 * _copy_from_iter_nocache() in that guarantees all data is flushed for
 840 * all iterator types. The _copy_from_iter_nocache() only attempts to
 841 * bypass the cache for the ITER_IOVEC case, and on some archs may use
 842 * instructions that strand dirty-data in the cache.
 843 */
 844size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 845{
 846        char *to = addr;
 847        if (unlikely(iov_iter_is_pipe(i))) {
 848                WARN_ON(1);
 849                return 0;
 850        }
 851        iterate_and_advance(i, bytes, v,
 852                __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
 853                                         v.iov_base, v.iov_len),
 854                memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
 855                                 v.bv_offset, v.bv_len),
 856                memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
 857                        v.iov_len)
 858        )
 859
 860        return bytes;
 861}
 862EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 863#endif
 864
 865bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 866{
 867        char *to = addr;
 868        if (unlikely(iov_iter_is_pipe(i))) {
 869                WARN_ON(1);
 870                return false;
 871        }
 872        if (unlikely(i->count < bytes))
 873                return false;
 874        iterate_all_kinds(i, bytes, v, ({
 875                if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
 876                                             v.iov_base, v.iov_len))
 877                        return false;
 878                0;}),
 879                memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
 880                                 v.bv_offset, v.bv_len),
 881                memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 882        )
 883
 884        iov_iter_advance(i, bytes);
 885        return true;
 886}
 887EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 888
 889static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 890{
 891        struct page *head;
 892        size_t v = n + offset;
 893
 894        /*
 895         * The general case needs to access the page order in order
 896         * to compute the page size.
 897         * However, we mostly deal with order-0 pages and thus can
 898         * avoid a possible cache line miss for requests that fit all
 899         * page orders.
 900         */
 901        if (n <= v && v <= PAGE_SIZE)
 902                return true;
 903
 904        head = compound_head(page);
 905        v += (page - head) << PAGE_SHIFT;
 906
 907        if (likely(n <= v && v <= (page_size(head))))
 908                return true;
 909        WARN_ON(1);
 910        return false;
 911}
 912
 913size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 914                         struct iov_iter *i)
 915{
 916        if (unlikely(!page_copy_sane(page, offset, bytes)))
 917                return 0;
 918        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 919                void *kaddr = kmap_atomic(page);
 920                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 921                kunmap_atomic(kaddr);
 922                return wanted;
 923        } else if (unlikely(iov_iter_is_discard(i)))
 924                return bytes;
 925        else if (likely(!iov_iter_is_pipe(i)))
 926                return copy_page_to_iter_iovec(page, offset, bytes, i);
 927        else
 928                return copy_page_to_iter_pipe(page, offset, bytes, i);
 929}
 930EXPORT_SYMBOL(copy_page_to_iter);
 931
 932size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 933                         struct iov_iter *i)
 934{
 935        if (unlikely(!page_copy_sane(page, offset, bytes)))
 936                return 0;
 937        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 938                WARN_ON(1);
 939                return 0;
 940        }
 941        if (i->type & (ITER_BVEC|ITER_KVEC)) {
 942                void *kaddr = kmap_atomic(page);
 943                size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
 944                kunmap_atomic(kaddr);
 945                return wanted;
 946        } else
 947                return copy_page_from_iter_iovec(page, offset, bytes, i);
 948}
 949EXPORT_SYMBOL(copy_page_from_iter);
 950
 951static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 952{
 953        struct pipe_inode_info *pipe = i->pipe;
 954        unsigned int p_mask = pipe->ring_size - 1;
 955        unsigned int i_head;
 956        size_t n, off;
 957
 958        if (!sanity(i))
 959                return 0;
 960
 961        bytes = n = push_pipe(i, bytes, &i_head, &off);
 962        if (unlikely(!n))
 963                return 0;
 964
 965        do {
 966                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 967                memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
 968                i->head = i_head;
 969                i->iov_offset = off + chunk;
 970                n -= chunk;
 971                off = 0;
 972                i_head++;
 973        } while (n);
 974        i->count -= bytes;
 975        return bytes;
 976}
 977
 978size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 979{
 980        if (unlikely(iov_iter_is_pipe(i)))
 981                return pipe_zero(bytes, i);
 982        iterate_and_advance(i, bytes, v,
 983                clear_user(v.iov_base, v.iov_len),
 984                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
 985                memset(v.iov_base, 0, v.iov_len)
 986        )
 987
 988        return bytes;
 989}
 990EXPORT_SYMBOL(iov_iter_zero);
 991
 992size_t iov_iter_copy_from_user_atomic(struct page *page,
 993                struct iov_iter *i, unsigned long offset, size_t bytes)
 994{
 995        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
 996        if (unlikely(!page_copy_sane(page, offset, bytes))) {
 997                kunmap_atomic(kaddr);
 998                return 0;
 999        }
1000        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1001                kunmap_atomic(kaddr);
1002                WARN_ON(1);
1003                return 0;
1004        }
1005        iterate_all_kinds(i, bytes, v,
1006                copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1007                memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1008                                 v.bv_offset, v.bv_len),
1009                memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
1010        )
1011        kunmap_atomic(kaddr);
1012        return bytes;
1013}
1014EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1015
1016static inline void pipe_truncate(struct iov_iter *i)
1017{
1018        struct pipe_inode_info *pipe = i->pipe;
1019        unsigned int p_tail = pipe->tail;
1020        unsigned int p_head = pipe->head;
1021        unsigned int p_mask = pipe->ring_size - 1;
1022
1023        if (!pipe_empty(p_head, p_tail)) {
1024                struct pipe_buffer *buf;
1025                unsigned int i_head = i->head;
1026                size_t off = i->iov_offset;
1027
1028                if (off) {
1029                        buf = &pipe->bufs[i_head & p_mask];
1030                        buf->len = off - buf->offset;
1031                        i_head++;
1032                }
1033                while (p_head != i_head) {
1034                        p_head--;
1035                        pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1036                }
1037
1038                pipe->head = p_head;
1039        }
1040}
1041
1042static void pipe_advance(struct iov_iter *i, size_t size)
1043{
1044        struct pipe_inode_info *pipe = i->pipe;
1045        if (unlikely(i->count < size))
1046                size = i->count;
1047        if (size) {
1048                struct pipe_buffer *buf;
1049                unsigned int p_mask = pipe->ring_size - 1;
1050                unsigned int i_head = i->head;
1051                size_t off = i->iov_offset, left = size;
1052
1053                if (off) /* make it relative to the beginning of buffer */
1054                        left += off - pipe->bufs[i_head & p_mask].offset;
1055                while (1) {
1056                        buf = &pipe->bufs[i_head & p_mask];
1057                        if (left <= buf->len)
1058                                break;
1059                        left -= buf->len;
1060                        i_head++;
1061                }
1062                i->head = i_head;
1063                i->iov_offset = buf->offset + left;
1064        }
1065        i->count -= size;
1066        /* ... and discard everything past that point */
1067        pipe_truncate(i);
1068}
1069
1070void iov_iter_advance(struct iov_iter *i, size_t size)
1071{
1072        if (unlikely(iov_iter_is_pipe(i))) {
1073                pipe_advance(i, size);
1074                return;
1075        }
1076        if (unlikely(iov_iter_is_discard(i))) {
1077                i->count -= size;
1078                return;
1079        }
1080        iterate_and_advance(i, size, v, 0, 0, 0)
1081}
1082EXPORT_SYMBOL(iov_iter_advance);
1083
1084void iov_iter_revert(struct iov_iter *i, size_t unroll)
1085{
1086        if (!unroll)
1087                return;
1088        if (WARN_ON(unroll > MAX_RW_COUNT))
1089                return;
1090        i->count += unroll;
1091        if (unlikely(iov_iter_is_pipe(i))) {
1092                struct pipe_inode_info *pipe = i->pipe;
1093                unsigned int p_mask = pipe->ring_size - 1;
1094                unsigned int i_head = i->head;
1095                size_t off = i->iov_offset;
1096                while (1) {
1097                        struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1098                        size_t n = off - b->offset;
1099                        if (unroll < n) {
1100                                off -= unroll;
1101                                break;
1102                        }
1103                        unroll -= n;
1104                        if (!unroll && i_head == i->start_head) {
1105                                off = 0;
1106                                break;
1107                        }
1108                        i_head--;
1109                        b = &pipe->bufs[i_head & p_mask];
1110                        off = b->offset + b->len;
1111                }
1112                i->iov_offset = off;
1113                i->head = i_head;
1114                pipe_truncate(i);
1115                return;
1116        }
1117        if (unlikely(iov_iter_is_discard(i)))
1118                return;
1119        if (unroll <= i->iov_offset) {
1120                i->iov_offset -= unroll;
1121                return;
1122        }
1123        unroll -= i->iov_offset;
1124        if (iov_iter_is_bvec(i)) {
1125                const struct bio_vec *bvec = i->bvec;
1126                while (1) {
1127                        size_t n = (--bvec)->bv_len;
1128                        i->nr_segs++;
1129                        if (unroll <= n) {
1130                                i->bvec = bvec;
1131                                i->iov_offset = n - unroll;
1132                                return;
1133                        }
1134                        unroll -= n;
1135                }
1136        } else { /* same logics for iovec and kvec */
1137                const struct iovec *iov = i->iov;
1138                while (1) {
1139                        size_t n = (--iov)->iov_len;
1140                        i->nr_segs++;
1141                        if (unroll <= n) {
1142                                i->iov = iov;
1143                                i->iov_offset = n - unroll;
1144                                return;
1145                        }
1146                        unroll -= n;
1147                }
1148        }
1149}
1150EXPORT_SYMBOL(iov_iter_revert);
1151
1152/*
1153 * Return the count of just the current iov_iter segment.
1154 */
1155size_t iov_iter_single_seg_count(const struct iov_iter *i)
1156{
1157        if (unlikely(iov_iter_is_pipe(i)))
1158                return i->count;        // it is a silly place, anyway
1159        if (i->nr_segs == 1)
1160                return i->count;
1161        if (unlikely(iov_iter_is_discard(i)))
1162                return i->count;
1163        else if (iov_iter_is_bvec(i))
1164                return min(i->count, i->bvec->bv_len - i->iov_offset);
1165        else
1166                return min(i->count, i->iov->iov_len - i->iov_offset);
1167}
1168EXPORT_SYMBOL(iov_iter_single_seg_count);
1169
1170void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1171                        const struct kvec *kvec, unsigned long nr_segs,
1172                        size_t count)
1173{
1174        WARN_ON(direction & ~(READ | WRITE));
1175        i->type = ITER_KVEC | (direction & (READ | WRITE));
1176        i->kvec = kvec;
1177        i->nr_segs = nr_segs;
1178        i->iov_offset = 0;
1179        i->count = count;
1180}
1181EXPORT_SYMBOL(iov_iter_kvec);
1182
1183void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1184                        const struct bio_vec *bvec, unsigned long nr_segs,
1185                        size_t count)
1186{
1187        WARN_ON(direction & ~(READ | WRITE));
1188        i->type = ITER_BVEC | (direction & (READ | WRITE));
1189        i->bvec = bvec;
1190        i->nr_segs = nr_segs;
1191        i->iov_offset = 0;
1192        i->count = count;
1193}
1194EXPORT_SYMBOL(iov_iter_bvec);
1195
1196void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1197                        struct pipe_inode_info *pipe,
1198                        size_t count)
1199{
1200        BUG_ON(direction != READ);
1201        WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1202        i->type = ITER_PIPE | READ;
1203        i->pipe = pipe;
1204        i->head = pipe->head;
1205        i->iov_offset = 0;
1206        i->count = count;
1207        i->start_head = i->head;
1208}
1209EXPORT_SYMBOL(iov_iter_pipe);
1210
1211/**
1212 * iov_iter_discard - Initialise an I/O iterator that discards data
1213 * @i: The iterator to initialise.
1214 * @direction: The direction of the transfer.
1215 * @count: The size of the I/O buffer in bytes.
1216 *
1217 * Set up an I/O iterator that just discards everything that's written to it.
1218 * It's only available as a READ iterator.
1219 */
1220void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1221{
1222        BUG_ON(direction != READ);
1223        i->type = ITER_DISCARD | READ;
1224        i->count = count;
1225        i->iov_offset = 0;
1226}
1227EXPORT_SYMBOL(iov_iter_discard);
1228
1229unsigned long iov_iter_alignment(const struct iov_iter *i)
1230{
1231        unsigned long res = 0;
1232        size_t size = i->count;
1233
1234        if (unlikely(iov_iter_is_pipe(i))) {
1235                unsigned int p_mask = i->pipe->ring_size - 1;
1236
1237                if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1238                        return size | i->iov_offset;
1239                return size;
1240        }
1241        iterate_all_kinds(i, size, v,
1242                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1243                res |= v.bv_offset | v.bv_len,
1244                res |= (unsigned long)v.iov_base | v.iov_len
1245        )
1246        return res;
1247}
1248EXPORT_SYMBOL(iov_iter_alignment);
1249
1250unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1251{
1252        unsigned long res = 0;
1253        size_t size = i->count;
1254
1255        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1256                WARN_ON(1);
1257                return ~0U;
1258        }
1259
1260        iterate_all_kinds(i, size, v,
1261                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1262                        (size != v.iov_len ? size : 0), 0),
1263                (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1264                        (size != v.bv_len ? size : 0)),
1265                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1266                        (size != v.iov_len ? size : 0))
1267                );
1268        return res;
1269}
1270EXPORT_SYMBOL(iov_iter_gap_alignment);
1271
1272static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1273                                size_t maxsize,
1274                                struct page **pages,
1275                                int iter_head,
1276                                size_t *start)
1277{
1278        struct pipe_inode_info *pipe = i->pipe;
1279        unsigned int p_mask = pipe->ring_size - 1;
1280        ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1281        if (!n)
1282                return -EFAULT;
1283
1284        maxsize = n;
1285        n += *start;
1286        while (n > 0) {
1287                get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1288                iter_head++;
1289                n -= PAGE_SIZE;
1290        }
1291
1292        return maxsize;
1293}
1294
1295static ssize_t pipe_get_pages(struct iov_iter *i,
1296                   struct page **pages, size_t maxsize, unsigned maxpages,
1297                   size_t *start)
1298{
1299        unsigned int iter_head, npages;
1300        size_t capacity;
1301
1302        if (!maxsize)
1303                return 0;
1304
1305        if (!sanity(i))
1306                return -EFAULT;
1307
1308        data_start(i, &iter_head, start);
1309        /* Amount of free space: some of this one + all after this one */
1310        npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1311        capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1312
1313        return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1314}
1315
1316ssize_t iov_iter_get_pages(struct iov_iter *i,
1317                   struct page **pages, size_t maxsize, unsigned maxpages,
1318                   size_t *start)
1319{
1320        if (maxsize > i->count)
1321                maxsize = i->count;
1322
1323        if (unlikely(iov_iter_is_pipe(i)))
1324                return pipe_get_pages(i, pages, maxsize, maxpages, start);
1325        if (unlikely(iov_iter_is_discard(i)))
1326                return -EFAULT;
1327
1328        iterate_all_kinds(i, maxsize, v, ({
1329                unsigned long addr = (unsigned long)v.iov_base;
1330                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1331                int n;
1332                int res;
1333
1334                if (len > maxpages * PAGE_SIZE)
1335                        len = maxpages * PAGE_SIZE;
1336                addr &= ~(PAGE_SIZE - 1);
1337                n = DIV_ROUND_UP(len, PAGE_SIZE);
1338                res = get_user_pages_fast(addr, n,
1339                                iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1340                                pages);
1341                if (unlikely(res < 0))
1342                        return res;
1343                return (res == n ? len : res * PAGE_SIZE) - *start;
1344        0;}),({
1345                /* can't be more than PAGE_SIZE */
1346                *start = v.bv_offset;
1347                get_page(*pages = v.bv_page);
1348                return v.bv_len;
1349        }),({
1350                return -EFAULT;
1351        })
1352        )
1353        return 0;
1354}
1355EXPORT_SYMBOL(iov_iter_get_pages);
1356
1357static struct page **get_pages_array(size_t n)
1358{
1359        return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1360}
1361
1362static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1363                   struct page ***pages, size_t maxsize,
1364                   size_t *start)
1365{
1366        struct page **p;
1367        unsigned int iter_head, npages;
1368        ssize_t n;
1369
1370        if (!maxsize)
1371                return 0;
1372
1373        if (!sanity(i))
1374                return -EFAULT;
1375
1376        data_start(i, &iter_head, start);
1377        /* Amount of free space: some of this one + all after this one */
1378        npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1379        n = npages * PAGE_SIZE - *start;
1380        if (maxsize > n)
1381                maxsize = n;
1382        else
1383                npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1384        p = get_pages_array(npages);
1385        if (!p)
1386                return -ENOMEM;
1387        n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1388        if (n > 0)
1389                *pages = p;
1390        else
1391                kvfree(p);
1392        return n;
1393}
1394
1395ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1396                   struct page ***pages, size_t maxsize,
1397                   size_t *start)
1398{
1399        struct page **p;
1400
1401        if (maxsize > i->count)
1402                maxsize = i->count;
1403
1404        if (unlikely(iov_iter_is_pipe(i)))
1405                return pipe_get_pages_alloc(i, pages, maxsize, start);
1406        if (unlikely(iov_iter_is_discard(i)))
1407                return -EFAULT;
1408
1409        iterate_all_kinds(i, maxsize, v, ({
1410                unsigned long addr = (unsigned long)v.iov_base;
1411                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1412                int n;
1413                int res;
1414
1415                addr &= ~(PAGE_SIZE - 1);
1416                n = DIV_ROUND_UP(len, PAGE_SIZE);
1417                p = get_pages_array(n);
1418                if (!p)
1419                        return -ENOMEM;
1420                res = get_user_pages_fast(addr, n,
1421                                iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1422                if (unlikely(res < 0)) {
1423                        kvfree(p);
1424                        return res;
1425                }
1426                *pages = p;
1427                return (res == n ? len : res * PAGE_SIZE) - *start;
1428        0;}),({
1429                /* can't be more than PAGE_SIZE */
1430                *start = v.bv_offset;
1431                *pages = p = get_pages_array(1);
1432                if (!p)
1433                        return -ENOMEM;
1434                get_page(*p = v.bv_page);
1435                return v.bv_len;
1436        }),({
1437                return -EFAULT;
1438        })
1439        )
1440        return 0;
1441}
1442EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1443
1444size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1445                               struct iov_iter *i)
1446{
1447        char *to = addr;
1448        __wsum sum, next;
1449        size_t off = 0;
1450        sum = *csum;
1451        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1452                WARN_ON(1);
1453                return 0;
1454        }
1455        iterate_and_advance(i, bytes, v, ({
1456                next = csum_and_copy_from_user(v.iov_base,
1457                                               (to += v.iov_len) - v.iov_len,
1458                                               v.iov_len);
1459                if (next) {
1460                        sum = csum_block_add(sum, next, off);
1461                        off += v.iov_len;
1462                }
1463                next ? 0 : v.iov_len;
1464        }), ({
1465                char *p = kmap_atomic(v.bv_page);
1466                sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1467                                      p + v.bv_offset, v.bv_len,
1468                                      sum, off);
1469                kunmap_atomic(p);
1470                off += v.bv_len;
1471        }),({
1472                sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1473                                      v.iov_base, v.iov_len,
1474                                      sum, off);
1475                off += v.iov_len;
1476        })
1477        )
1478        *csum = sum;
1479        return bytes;
1480}
1481EXPORT_SYMBOL(csum_and_copy_from_iter);
1482
1483bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1484                               struct iov_iter *i)
1485{
1486        char *to = addr;
1487        __wsum sum, next;
1488        size_t off = 0;
1489        sum = *csum;
1490        if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1491                WARN_ON(1);
1492                return false;
1493        }
1494        if (unlikely(i->count < bytes))
1495                return false;
1496        iterate_all_kinds(i, bytes, v, ({
1497                next = csum_and_copy_from_user(v.iov_base,
1498                                               (to += v.iov_len) - v.iov_len,
1499                                               v.iov_len);
1500                if (!next)
1501                        return false;
1502                sum = csum_block_add(sum, next, off);
1503                off += v.iov_len;
1504                0;
1505        }), ({
1506                char *p = kmap_atomic(v.bv_page);
1507                sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1508                                      p + v.bv_offset, v.bv_len,
1509                                      sum, off);
1510                kunmap_atomic(p);
1511                off += v.bv_len;
1512        }),({
1513                sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1514                                      v.iov_base, v.iov_len,
1515                                      sum, off);
1516                off += v.iov_len;
1517        })
1518        )
1519        *csum = sum;
1520        iov_iter_advance(i, bytes);
1521        return true;
1522}
1523EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1524
1525size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
1526                             struct iov_iter *i)
1527{
1528        const char *from = addr;
1529        __wsum *csum = csump;
1530        __wsum sum, next;
1531        size_t off = 0;
1532
1533        if (unlikely(iov_iter_is_pipe(i)))
1534                return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
1535
1536        sum = *csum;
1537        if (unlikely(iov_iter_is_discard(i))) {
1538                WARN_ON(1);     /* for now */
1539                return 0;
1540        }
1541        iterate_and_advance(i, bytes, v, ({
1542                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1543                                             v.iov_base,
1544                                             v.iov_len);
1545                if (next) {
1546                        sum = csum_block_add(sum, next, off);
1547                        off += v.iov_len;
1548                }
1549                next ? 0 : v.iov_len;
1550        }), ({
1551                char *p = kmap_atomic(v.bv_page);
1552                sum = csum_and_memcpy(p + v.bv_offset,
1553                                      (from += v.bv_len) - v.bv_len,
1554                                      v.bv_len, sum, off);
1555                kunmap_atomic(p);
1556                off += v.bv_len;
1557        }),({
1558                sum = csum_and_memcpy(v.iov_base,
1559                                     (from += v.iov_len) - v.iov_len,
1560                                     v.iov_len, sum, off);
1561                off += v.iov_len;
1562        })
1563        )
1564        *csum = sum;
1565        return bytes;
1566}
1567EXPORT_SYMBOL(csum_and_copy_to_iter);
1568
1569size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1570                struct iov_iter *i)
1571{
1572#ifdef CONFIG_CRYPTO_HASH
1573        struct ahash_request *hash = hashp;
1574        struct scatterlist sg;
1575        size_t copied;
1576
1577        copied = copy_to_iter(addr, bytes, i);
1578        sg_init_one(&sg, addr, copied);
1579        ahash_request_set_crypt(hash, &sg, NULL, copied);
1580        crypto_ahash_update(hash);
1581        return copied;
1582#else
1583        return 0;
1584#endif
1585}
1586EXPORT_SYMBOL(hash_and_copy_to_iter);
1587
1588int iov_iter_npages(const struct iov_iter *i, int maxpages)
1589{
1590        size_t size = i->count;
1591        int npages = 0;
1592
1593        if (!size)
1594                return 0;
1595        if (unlikely(iov_iter_is_discard(i)))
1596                return 0;
1597
1598        if (unlikely(iov_iter_is_pipe(i))) {
1599                struct pipe_inode_info *pipe = i->pipe;
1600                unsigned int iter_head;
1601                size_t off;
1602
1603                if (!sanity(i))
1604                        return 0;
1605
1606                data_start(i, &iter_head, &off);
1607                /* some of this one + all after this one */
1608                npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1609                if (npages >= maxpages)
1610                        return maxpages;
1611        } else iterate_all_kinds(i, size, v, ({
1612                unsigned long p = (unsigned long)v.iov_base;
1613                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1614                        - p / PAGE_SIZE;
1615                if (npages >= maxpages)
1616                        return maxpages;
1617        0;}),({
1618                npages++;
1619                if (npages >= maxpages)
1620                        return maxpages;
1621        }),({
1622                unsigned long p = (unsigned long)v.iov_base;
1623                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1624                        - p / PAGE_SIZE;
1625                if (npages >= maxpages)
1626                        return maxpages;
1627        })
1628        )
1629        return npages;
1630}
1631EXPORT_SYMBOL(iov_iter_npages);
1632
1633const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1634{
1635        *new = *old;
1636        if (unlikely(iov_iter_is_pipe(new))) {
1637                WARN_ON(1);
1638                return NULL;
1639        }
1640        if (unlikely(iov_iter_is_discard(new)))
1641                return NULL;
1642        if (iov_iter_is_bvec(new))
1643                return new->bvec = kmemdup(new->bvec,
1644                                    new->nr_segs * sizeof(struct bio_vec),
1645                                    flags);
1646        else
1647                /* iovec and kvec have identical layout */
1648                return new->iov = kmemdup(new->iov,
1649                                   new->nr_segs * sizeof(struct iovec),
1650                                   flags);
1651}
1652EXPORT_SYMBOL(dup_iter);
1653
1654static int copy_compat_iovec_from_user(struct iovec *iov,
1655                const struct iovec __user *uvec, unsigned long nr_segs)
1656{
1657        const struct compat_iovec __user *uiov =
1658                (const struct compat_iovec __user *)uvec;
1659        int ret = -EFAULT, i;
1660
1661        if (!user_access_begin(uvec, nr_segs * sizeof(*uvec)))
1662                return -EFAULT;
1663
1664        for (i = 0; i < nr_segs; i++) {
1665                compat_uptr_t buf;
1666                compat_ssize_t len;
1667
1668                unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1669                unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1670
1671                /* check for compat_size_t not fitting in compat_ssize_t .. */
1672                if (len < 0) {
1673                        ret = -EINVAL;
1674                        goto uaccess_end;
1675                }
1676                iov[i].iov_base = compat_ptr(buf);
1677                iov[i].iov_len = len;
1678        }
1679
1680        ret = 0;
1681uaccess_end:
1682        user_access_end();
1683        return ret;
1684}
1685
1686static int copy_iovec_from_user(struct iovec *iov,
1687                const struct iovec __user *uvec, unsigned long nr_segs)
1688{
1689        unsigned long seg;
1690
1691        if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1692                return -EFAULT;
1693        for (seg = 0; seg < nr_segs; seg++) {
1694                if ((ssize_t)iov[seg].iov_len < 0)
1695                        return -EINVAL;
1696        }
1697
1698        return 0;
1699}
1700
1701struct iovec *iovec_from_user(const struct iovec __user *uvec,
1702                unsigned long nr_segs, unsigned long fast_segs,
1703                struct iovec *fast_iov, bool compat)
1704{
1705        struct iovec *iov = fast_iov;
1706        int ret;
1707
1708        /*
1709         * SuS says "The readv() function *may* fail if the iovcnt argument was
1710         * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1711         * traditionally returned zero for zero segments, so...
1712         */
1713        if (nr_segs == 0)
1714                return iov;
1715        if (nr_segs > UIO_MAXIOV)
1716                return ERR_PTR(-EINVAL);
1717        if (nr_segs > fast_segs) {
1718                iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1719                if (!iov)
1720                        return ERR_PTR(-ENOMEM);
1721        }
1722
1723        if (compat)
1724                ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1725        else
1726                ret = copy_iovec_from_user(iov, uvec, nr_segs);
1727        if (ret) {
1728                if (iov != fast_iov)
1729                        kfree(iov);
1730                return ERR_PTR(ret);
1731        }
1732
1733        return iov;
1734}
1735
1736ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1737                 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1738                 struct iov_iter *i, bool compat)
1739{
1740        ssize_t total_len = 0;
1741        unsigned long seg;
1742        struct iovec *iov;
1743
1744        iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1745        if (IS_ERR(iov)) {
1746                *iovp = NULL;
1747                return PTR_ERR(iov);
1748        }
1749
1750        /*
1751         * According to the Single Unix Specification we should return EINVAL if
1752         * an element length is < 0 when cast to ssize_t or if the total length
1753         * would overflow the ssize_t return value of the system call.
1754         *
1755         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1756         * overflow case.
1757         */
1758        for (seg = 0; seg < nr_segs; seg++) {
1759                ssize_t len = (ssize_t)iov[seg].iov_len;
1760
1761                if (!access_ok(iov[seg].iov_base, len)) {
1762                        if (iov != *iovp)
1763                                kfree(iov);
1764                        *iovp = NULL;
1765                        return -EFAULT;
1766                }
1767
1768                if (len > MAX_RW_COUNT - total_len) {
1769                        len = MAX_RW_COUNT - total_len;
1770                        iov[seg].iov_len = len;
1771                }
1772                total_len += len;
1773        }
1774
1775        iov_iter_init(i, type, iov, nr_segs, total_len);
1776        if (iov == *iovp)
1777                *iovp = NULL;
1778        else
1779                *iovp = iov;
1780        return total_len;
1781}
1782
1783/**
1784 * import_iovec() - Copy an array of &struct iovec from userspace
1785 *     into the kernel, check that it is valid, and initialize a new
1786 *     &struct iov_iter iterator to access it.
1787 *
1788 * @type: One of %READ or %WRITE.
1789 * @uvec: Pointer to the userspace array.
1790 * @nr_segs: Number of elements in userspace array.
1791 * @fast_segs: Number of elements in @iov.
1792 * @iovp: (input and output parameter) Pointer to pointer to (usually small
1793 *     on-stack) kernel array.
1794 * @i: Pointer to iterator that will be initialized on success.
1795 *
1796 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1797 * then this function places %NULL in *@iov on return. Otherwise, a new
1798 * array will be allocated and the result placed in *@iov. This means that
1799 * the caller may call kfree() on *@iov regardless of whether the small
1800 * on-stack array was used or not (and regardless of whether this function
1801 * returns an error or not).
1802 *
1803 * Return: Negative error code on error, bytes imported on success
1804 */
1805ssize_t import_iovec(int type, const struct iovec __user *uvec,
1806                 unsigned nr_segs, unsigned fast_segs,
1807                 struct iovec **iovp, struct iov_iter *i)
1808{
1809        return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
1810                              in_compat_syscall());
1811}
1812EXPORT_SYMBOL(import_iovec);
1813
1814int import_single_range(int rw, void __user *buf, size_t len,
1815                 struct iovec *iov, struct iov_iter *i)
1816{
1817        if (len > MAX_RW_COUNT)
1818                len = MAX_RW_COUNT;
1819        if (unlikely(!access_ok(buf, len)))
1820                return -EFAULT;
1821
1822        iov->iov_base = buf;
1823        iov->iov_len = len;
1824        iov_iter_init(i, rw, iov, 1, len);
1825        return 0;
1826}
1827EXPORT_SYMBOL(import_single_range);
1828
1829int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1830                            int (*f)(struct kvec *vec, void *context),
1831                            void *context)
1832{
1833        struct kvec w;
1834        int err = -EINVAL;
1835        if (!bytes)
1836                return 0;
1837
1838        iterate_all_kinds(i, bytes, v, -EINVAL, ({
1839                w.iov_base = kmap(v.bv_page) + v.bv_offset;
1840                w.iov_len = v.bv_len;
1841                err = f(&w, context);
1842                kunmap(v.bv_page);
1843                err;}), ({
1844                w = v;
1845                err = f(&w, context);})
1846        )
1847        return err;
1848}
1849EXPORT_SYMBOL(iov_iter_for_each_range);
1850