linux/fs/read_write.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/read_write.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/slab.h> 
   8#include <linux/stat.h>
   9#include <linux/fcntl.h>
  10#include <linux/file.h>
  11#include <linux/uio.h>
  12#include <linux/smp_lock.h>
  13#include <linux/fsnotify.h>
  14#include <linux/security.h>
  15#include <linux/module.h>
  16#include <linux/syscalls.h>
  17#include <linux/pagemap.h>
  18#include <linux/splice.h>
  19#include "read_write.h"
  20
  21#include <asm/uaccess.h>
  22#include <asm/unistd.h>
  23
  24const struct file_operations generic_ro_fops = {
  25        .llseek         = generic_file_llseek,
  26        .read           = do_sync_read,
  27        .aio_read       = generic_file_aio_read,
  28        .mmap           = generic_file_readonly_mmap,
  29        .splice_read    = generic_file_splice_read,
  30};
  31
  32EXPORT_SYMBOL(generic_ro_fops);
  33
  34/**
  35 * generic_file_llseek_unlocked - lockless generic llseek implementation
  36 * @file:       file structure to seek on
  37 * @offset:     file offset to seek to
  38 * @origin:     type of seek
  39 *
  40 * Updates the file offset to the value specified by @offset and @origin.
  41 * Locking must be provided by the caller.
  42 */
  43loff_t
  44generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
  45{
  46        struct inode *inode = file->f_mapping->host;
  47
  48        switch (origin) {
  49        case SEEK_END:
  50                offset += inode->i_size;
  51                break;
  52        case SEEK_CUR:
  53                /*
  54                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
  55                 * position-querying operation.  Avoid rewriting the "same"
  56                 * f_pos value back to the file because a concurrent read(),
  57                 * write() or lseek() might have altered it
  58                 */
  59                if (offset == 0)
  60                        return file->f_pos;
  61                offset += file->f_pos;
  62                break;
  63        }
  64
  65        if (offset < 0 || offset > inode->i_sb->s_maxbytes)
  66                return -EINVAL;
  67
  68        /* Special lock needed here? */
  69        if (offset != file->f_pos) {
  70                file->f_pos = offset;
  71                file->f_version = 0;
  72        }
  73
  74        return offset;
  75}
  76EXPORT_SYMBOL(generic_file_llseek_unlocked);
  77
  78/**
  79 * generic_file_llseek - generic llseek implementation for regular files
  80 * @file:       file structure to seek on
  81 * @offset:     file offset to seek to
  82 * @origin:     type of seek
  83 *
  84 * This is a generic implemenation of ->llseek useable for all normal local
  85 * filesystems.  It just updates the file offset to the value specified by
  86 * @offset and @origin under i_mutex.
  87 */
  88loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
  89{
  90        loff_t rval;
  91
  92        mutex_lock(&file->f_dentry->d_inode->i_mutex);
  93        rval = generic_file_llseek_unlocked(file, offset, origin);
  94        mutex_unlock(&file->f_dentry->d_inode->i_mutex);
  95
  96        return rval;
  97}
  98EXPORT_SYMBOL(generic_file_llseek);
  99
 100loff_t no_llseek(struct file *file, loff_t offset, int origin)
 101{
 102        return -ESPIPE;
 103}
 104EXPORT_SYMBOL(no_llseek);
 105
 106loff_t default_llseek(struct file *file, loff_t offset, int origin)
 107{
 108        loff_t retval;
 109
 110        lock_kernel();
 111        switch (origin) {
 112                case SEEK_END:
 113                        offset += i_size_read(file->f_path.dentry->d_inode);
 114                        break;
 115                case SEEK_CUR:
 116                        if (offset == 0) {
 117                                retval = file->f_pos;
 118                                goto out;
 119                        }
 120                        offset += file->f_pos;
 121        }
 122        retval = -EINVAL;
 123        if (offset >= 0) {
 124                if (offset != file->f_pos) {
 125                        file->f_pos = offset;
 126                        file->f_version = 0;
 127                }
 128                retval = offset;
 129        }
 130out:
 131        unlock_kernel();
 132        return retval;
 133}
 134EXPORT_SYMBOL(default_llseek);
 135
 136loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 137{
 138        loff_t (*fn)(struct file *, loff_t, int);
 139
 140        fn = no_llseek;
 141        if (file->f_mode & FMODE_LSEEK) {
 142                fn = default_llseek;
 143                if (file->f_op && file->f_op->llseek)
 144                        fn = file->f_op->llseek;
 145        }
 146        return fn(file, offset, origin);
 147}
 148EXPORT_SYMBOL(vfs_llseek);
 149
 150SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 151{
 152        off_t retval;
 153        struct file * file;
 154        int fput_needed;
 155
 156        retval = -EBADF;
 157        file = fget_light(fd, &fput_needed);
 158        if (!file)
 159                goto bad;
 160
 161        retval = -EINVAL;
 162        if (origin <= SEEK_MAX) {
 163                loff_t res = vfs_llseek(file, offset, origin);
 164                retval = res;
 165                if (res != (loff_t)retval)
 166                        retval = -EOVERFLOW;    /* LFS: should only happen on 32 bit platforms */
 167        }
 168        fput_light(file, fput_needed);
 169bad:
 170        return retval;
 171}
 172
 173#ifdef __ARCH_WANT_SYS_LLSEEK
 174SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 175                unsigned long, offset_low, loff_t __user *, result,
 176                unsigned int, origin)
 177{
 178        int retval;
 179        struct file * file;
 180        loff_t offset;
 181        int fput_needed;
 182
 183        retval = -EBADF;
 184        file = fget_light(fd, &fput_needed);
 185        if (!file)
 186                goto bad;
 187
 188        retval = -EINVAL;
 189        if (origin > SEEK_MAX)
 190                goto out_putf;
 191
 192        offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
 193                        origin);
 194
 195        retval = (int)offset;
 196        if (offset >= 0) {
 197                retval = -EFAULT;
 198                if (!copy_to_user(result, &offset, sizeof(offset)))
 199                        retval = 0;
 200        }
 201out_putf:
 202        fput_light(file, fput_needed);
 203bad:
 204        return retval;
 205}
 206#endif
 207
 208/*
 209 * rw_verify_area doesn't like huge counts. We limit
 210 * them to something that fits in "int" so that others
 211 * won't have to do range checks all the time.
 212 */
 213#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
 214
 215int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 216{
 217        struct inode *inode;
 218        loff_t pos;
 219        int retval = -EINVAL;
 220
 221        inode = file->f_path.dentry->d_inode;
 222        if (unlikely((ssize_t) count < 0))
 223                return retval;
 224        pos = *ppos;
 225        if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
 226                return retval;
 227
 228        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 229                retval = locks_mandatory_area(
 230                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
 231                        inode, file, pos, count);
 232                if (retval < 0)
 233                        return retval;
 234        }
 235        retval = security_file_permission(file,
 236                                read_write == READ ? MAY_READ : MAY_WRITE);
 237        if (retval)
 238                return retval;
 239        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
 240}
 241
 242static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
 243{
 244        set_current_state(TASK_UNINTERRUPTIBLE);
 245        if (!kiocbIsKicked(iocb))
 246                schedule();
 247        else
 248                kiocbClearKicked(iocb);
 249        __set_current_state(TASK_RUNNING);
 250}
 251
 252ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 253{
 254        struct iovec iov = { .iov_base = buf, .iov_len = len };
 255        struct kiocb kiocb;
 256        ssize_t ret;
 257
 258        init_sync_kiocb(&kiocb, filp);
 259        kiocb.ki_pos = *ppos;
 260        kiocb.ki_left = len;
 261
 262        for (;;) {
 263                ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 264                if (ret != -EIOCBRETRY)
 265                        break;
 266                wait_on_retry_sync_kiocb(&kiocb);
 267        }
 268
 269        if (-EIOCBQUEUED == ret)
 270                ret = wait_on_sync_kiocb(&kiocb);
 271        *ppos = kiocb.ki_pos;
 272        return ret;
 273}
 274
 275EXPORT_SYMBOL(do_sync_read);
 276
 277ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 278{
 279        ssize_t ret;
 280
 281        if (!(file->f_mode & FMODE_READ))
 282                return -EBADF;
 283        if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
 284                return -EINVAL;
 285        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 286                return -EFAULT;
 287
 288        ret = rw_verify_area(READ, file, pos, count);
 289        if (ret >= 0) {
 290                count = ret;
 291                if (file->f_op->read)
 292                        ret = file->f_op->read(file, buf, count, pos);
 293                else
 294                        ret = do_sync_read(file, buf, count, pos);
 295                if (ret > 0) {
 296                        fsnotify_access(file->f_path.dentry);
 297                        add_rchar(current, ret);
 298                }
 299                inc_syscr(current);
 300        }
 301
 302        return ret;
 303}
 304
 305EXPORT_SYMBOL(vfs_read);
 306
 307ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 308{
 309        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 310        struct kiocb kiocb;
 311        ssize_t ret;
 312
 313        init_sync_kiocb(&kiocb, filp);
 314        kiocb.ki_pos = *ppos;
 315        kiocb.ki_left = len;
 316
 317        for (;;) {
 318                ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 319                if (ret != -EIOCBRETRY)
 320                        break;
 321                wait_on_retry_sync_kiocb(&kiocb);
 322        }
 323
 324        if (-EIOCBQUEUED == ret)
 325                ret = wait_on_sync_kiocb(&kiocb);
 326        *ppos = kiocb.ki_pos;
 327        return ret;
 328}
 329
 330EXPORT_SYMBOL(do_sync_write);
 331
 332ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 333{
 334        ssize_t ret;
 335
 336        if (!(file->f_mode & FMODE_WRITE))
 337                return -EBADF;
 338        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
 339                return -EINVAL;
 340        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 341                return -EFAULT;
 342
 343        ret = rw_verify_area(WRITE, file, pos, count);
 344        if (ret >= 0) {
 345                count = ret;
 346                if (file->f_op->write)
 347                        ret = file->f_op->write(file, buf, count, pos);
 348                else
 349                        ret = do_sync_write(file, buf, count, pos);
 350                if (ret > 0) {
 351                        fsnotify_modify(file->f_path.dentry);
 352                        add_wchar(current, ret);
 353                }
 354                inc_syscw(current);
 355        }
 356
 357        return ret;
 358}
 359
 360EXPORT_SYMBOL(vfs_write);
 361
 362static inline loff_t file_pos_read(struct file *file)
 363{
 364        return file->f_pos;
 365}
 366
 367static inline void file_pos_write(struct file *file, loff_t pos)
 368{
 369        file->f_pos = pos;
 370}
 371
 372SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 373{
 374        struct file *file;
 375        ssize_t ret = -EBADF;
 376        int fput_needed;
 377
 378        file = fget_light(fd, &fput_needed);
 379        if (file) {
 380                loff_t pos = file_pos_read(file);
 381                ret = vfs_read(file, buf, count, &pos);
 382                file_pos_write(file, pos);
 383                fput_light(file, fput_needed);
 384        }
 385
 386        return ret;
 387}
 388
 389SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 390                size_t, count)
 391{
 392        struct file *file;
 393        ssize_t ret = -EBADF;
 394        int fput_needed;
 395
 396        file = fget_light(fd, &fput_needed);
 397        if (file) {
 398                loff_t pos = file_pos_read(file);
 399                ret = vfs_write(file, buf, count, &pos);
 400                file_pos_write(file, pos);
 401                fput_light(file, fput_needed);
 402        }
 403
 404        return ret;
 405}
 406
 407SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
 408                        size_t count, loff_t pos)
 409{
 410        struct file *file;
 411        ssize_t ret = -EBADF;
 412        int fput_needed;
 413
 414        if (pos < 0)
 415                return -EINVAL;
 416
 417        file = fget_light(fd, &fput_needed);
 418        if (file) {
 419                ret = -ESPIPE;
 420                if (file->f_mode & FMODE_PREAD)
 421                        ret = vfs_read(file, buf, count, &pos);
 422                fput_light(file, fput_needed);
 423        }
 424
 425        return ret;
 426}
 427#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 428asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
 429{
 430        return SYSC_pread64((unsigned int) fd, (char __user *) buf,
 431                            (size_t) count, pos);
 432}
 433SYSCALL_ALIAS(sys_pread64, SyS_pread64);
 434#endif
 435
 436SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
 437                         size_t count, loff_t pos)
 438{
 439        struct file *file;
 440        ssize_t ret = -EBADF;
 441        int fput_needed;
 442
 443        if (pos < 0)
 444                return -EINVAL;
 445
 446        file = fget_light(fd, &fput_needed);
 447        if (file) {
 448                ret = -ESPIPE;
 449                if (file->f_mode & FMODE_PWRITE)  
 450                        ret = vfs_write(file, buf, count, &pos);
 451                fput_light(file, fput_needed);
 452        }
 453
 454        return ret;
 455}
 456#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 457asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
 458{
 459        return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
 460                             (size_t) count, pos);
 461}
 462SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
 463#endif
 464
 465/*
 466 * Reduce an iovec's length in-place.  Return the resulting number of segments
 467 */
 468unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 469{
 470        unsigned long seg = 0;
 471        size_t len = 0;
 472
 473        while (seg < nr_segs) {
 474                seg++;
 475                if (len + iov->iov_len >= to) {
 476                        iov->iov_len = to - len;
 477                        break;
 478                }
 479                len += iov->iov_len;
 480                iov++;
 481        }
 482        return seg;
 483}
 484EXPORT_SYMBOL(iov_shorten);
 485
 486ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 487                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 488{
 489        struct kiocb kiocb;
 490        ssize_t ret;
 491
 492        init_sync_kiocb(&kiocb, filp);
 493        kiocb.ki_pos = *ppos;
 494        kiocb.ki_left = len;
 495        kiocb.ki_nbytes = len;
 496
 497        for (;;) {
 498                ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 499                if (ret != -EIOCBRETRY)
 500                        break;
 501                wait_on_retry_sync_kiocb(&kiocb);
 502        }
 503
 504        if (ret == -EIOCBQUEUED)
 505                ret = wait_on_sync_kiocb(&kiocb);
 506        *ppos = kiocb.ki_pos;
 507        return ret;
 508}
 509
 510/* Do it by hand, with file-ops */
 511ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 512                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 513{
 514        struct iovec *vector = iov;
 515        ssize_t ret = 0;
 516
 517        while (nr_segs > 0) {
 518                void __user *base;
 519                size_t len;
 520                ssize_t nr;
 521
 522                base = vector->iov_base;
 523                len = vector->iov_len;
 524                vector++;
 525                nr_segs--;
 526
 527                nr = fn(filp, base, len, ppos);
 528
 529                if (nr < 0) {
 530                        if (!ret)
 531                                ret = nr;
 532                        break;
 533                }
 534                ret += nr;
 535                if (nr != len)
 536                        break;
 537        }
 538
 539        return ret;
 540}
 541
 542/* A write operation does a read from user space and vice versa */
 543#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 544
 545ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 546                              unsigned long nr_segs, unsigned long fast_segs,
 547                              struct iovec *fast_pointer,
 548                              struct iovec **ret_pointer)
 549  {
 550        unsigned long seg;
 551        ssize_t ret;
 552        struct iovec *iov = fast_pointer;
 553
 554        /*
 555         * SuS says "The readv() function *may* fail if the iovcnt argument
 556         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
 557         * traditionally returned zero for zero segments, so...
 558         */
 559        if (nr_segs == 0) {
 560                ret = 0;
 561                goto out;
 562        }
 563
 564        /*
 565         * First get the "struct iovec" from user memory and
 566         * verify all the pointers
 567         */
 568        if (nr_segs > UIO_MAXIOV) {
 569                ret = -EINVAL;
 570                goto out;
 571        }
 572        if (nr_segs > fast_segs) {
 573                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 574                if (iov == NULL) {
 575                        ret = -ENOMEM;
 576                        goto out;
 577                }
 578        }
 579        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 580                ret = -EFAULT;
 581                goto out;
 582        }
 583
 584        /*
 585         * According to the Single Unix Specification we should return EINVAL
 586         * if an element length is < 0 when cast to ssize_t or if the
 587         * total length would overflow the ssize_t return value of the
 588         * system call.
 589         */
 590        ret = 0;
 591        for (seg = 0; seg < nr_segs; seg++) {
 592                void __user *buf = iov[seg].iov_base;
 593                ssize_t len = (ssize_t)iov[seg].iov_len;
 594
 595                /* see if we we're about to use an invalid len or if
 596                 * it's about to overflow ssize_t */
 597                if (len < 0 || (ret + len < ret)) {
 598                        ret = -EINVAL;
 599                        goto out;
 600                }
 601                if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 602                        ret = -EFAULT;
 603                        goto out;
 604                }
 605
 606                ret += len;
 607        }
 608out:
 609        *ret_pointer = iov;
 610        return ret;
 611}
 612
 613static ssize_t do_readv_writev(int type, struct file *file,
 614                               const struct iovec __user * uvector,
 615                               unsigned long nr_segs, loff_t *pos)
 616{
 617        size_t tot_len;
 618        struct iovec iovstack[UIO_FASTIOV];
 619        struct iovec *iov = iovstack;
 620        ssize_t ret;
 621        io_fn_t fn;
 622        iov_fn_t fnv;
 623
 624        if (!file->f_op) {
 625                ret = -EINVAL;
 626                goto out;
 627        }
 628
 629        ret = rw_copy_check_uvector(type, uvector, nr_segs,
 630                        ARRAY_SIZE(iovstack), iovstack, &iov);
 631        if (ret <= 0)
 632                goto out;
 633
 634        tot_len = ret;
 635        ret = rw_verify_area(type, file, pos, tot_len);
 636        if (ret < 0)
 637                goto out;
 638
 639        fnv = NULL;
 640        if (type == READ) {
 641                fn = file->f_op->read;
 642                fnv = file->f_op->aio_read;
 643        } else {
 644                fn = (io_fn_t)file->f_op->write;
 645                fnv = file->f_op->aio_write;
 646        }
 647
 648        if (fnv)
 649                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 650                                                pos, fnv);
 651        else
 652                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 653
 654out:
 655        if (iov != iovstack)
 656                kfree(iov);
 657        if ((ret + (type == READ)) > 0) {
 658                if (type == READ)
 659                        fsnotify_access(file->f_path.dentry);
 660                else
 661                        fsnotify_modify(file->f_path.dentry);
 662        }
 663        return ret;
 664}
 665
 666ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 667                  unsigned long vlen, loff_t *pos)
 668{
 669        if (!(file->f_mode & FMODE_READ))
 670                return -EBADF;
 671        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 672                return -EINVAL;
 673
 674        return do_readv_writev(READ, file, vec, vlen, pos);
 675}
 676
 677EXPORT_SYMBOL(vfs_readv);
 678
 679ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 680                   unsigned long vlen, loff_t *pos)
 681{
 682        if (!(file->f_mode & FMODE_WRITE))
 683                return -EBADF;
 684        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 685                return -EINVAL;
 686
 687        return do_readv_writev(WRITE, file, vec, vlen, pos);
 688}
 689
 690EXPORT_SYMBOL(vfs_writev);
 691
 692SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 693                unsigned long, vlen)
 694{
 695        struct file *file;
 696        ssize_t ret = -EBADF;
 697        int fput_needed;
 698
 699        file = fget_light(fd, &fput_needed);
 700        if (file) {
 701                loff_t pos = file_pos_read(file);
 702                ret = vfs_readv(file, vec, vlen, &pos);
 703                file_pos_write(file, pos);
 704                fput_light(file, fput_needed);
 705        }
 706
 707        if (ret > 0)
 708                add_rchar(current, ret);
 709        inc_syscr(current);
 710        return ret;
 711}
 712
 713SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 714                unsigned long, vlen)
 715{
 716        struct file *file;
 717        ssize_t ret = -EBADF;
 718        int fput_needed;
 719
 720        file = fget_light(fd, &fput_needed);
 721        if (file) {
 722                loff_t pos = file_pos_read(file);
 723                ret = vfs_writev(file, vec, vlen, &pos);
 724                file_pos_write(file, pos);
 725                fput_light(file, fput_needed);
 726        }
 727
 728        if (ret > 0)
 729                add_wchar(current, ret);
 730        inc_syscw(current);
 731        return ret;
 732}
 733
 734static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 735{
 736#define HALF_LONG_BITS (BITS_PER_LONG / 2)
 737        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 738}
 739
 740SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 741                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 742{
 743        loff_t pos = pos_from_hilo(pos_h, pos_l);
 744        struct file *file;
 745        ssize_t ret = -EBADF;
 746        int fput_needed;
 747
 748        if (pos < 0)
 749                return -EINVAL;
 750
 751        file = fget_light(fd, &fput_needed);
 752        if (file) {
 753                ret = -ESPIPE;
 754                if (file->f_mode & FMODE_PREAD)
 755                        ret = vfs_readv(file, vec, vlen, &pos);
 756                fput_light(file, fput_needed);
 757        }
 758
 759        if (ret > 0)
 760                add_rchar(current, ret);
 761        inc_syscr(current);
 762        return ret;
 763}
 764
 765SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 766                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 767{
 768        loff_t pos = pos_from_hilo(pos_h, pos_l);
 769        struct file *file;
 770        ssize_t ret = -EBADF;
 771        int fput_needed;
 772
 773        if (pos < 0)
 774                return -EINVAL;
 775
 776        file = fget_light(fd, &fput_needed);
 777        if (file) {
 778                ret = -ESPIPE;
 779                if (file->f_mode & FMODE_PWRITE)
 780                        ret = vfs_writev(file, vec, vlen, &pos);
 781                fput_light(file, fput_needed);
 782        }
 783
 784        if (ret > 0)
 785                add_wchar(current, ret);
 786        inc_syscw(current);
 787        return ret;
 788}
 789
 790static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 791                           size_t count, loff_t max)
 792{
 793        struct file * in_file, * out_file;
 794        struct inode * in_inode, * out_inode;
 795        loff_t pos;
 796        ssize_t retval;
 797        int fput_needed_in, fput_needed_out, fl;
 798
 799        /*
 800         * Get input file, and verify that it is ok..
 801         */
 802        retval = -EBADF;
 803        in_file = fget_light(in_fd, &fput_needed_in);
 804        if (!in_file)
 805                goto out;
 806        if (!(in_file->f_mode & FMODE_READ))
 807                goto fput_in;
 808        retval = -ESPIPE;
 809        if (!ppos)
 810                ppos = &in_file->f_pos;
 811        else
 812                if (!(in_file->f_mode & FMODE_PREAD))
 813                        goto fput_in;
 814        retval = rw_verify_area(READ, in_file, ppos, count);
 815        if (retval < 0)
 816                goto fput_in;
 817        count = retval;
 818
 819        /*
 820         * Get output file, and verify that it is ok..
 821         */
 822        retval = -EBADF;
 823        out_file = fget_light(out_fd, &fput_needed_out);
 824        if (!out_file)
 825                goto fput_in;
 826        if (!(out_file->f_mode & FMODE_WRITE))
 827                goto fput_out;
 828        retval = -EINVAL;
 829        if (!out_file->f_op || !out_file->f_op->sendpage)
 830                goto fput_out;
 831        in_inode = in_file->f_path.dentry->d_inode;
 832        out_inode = out_file->f_path.dentry->d_inode;
 833        retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 834        if (retval < 0)
 835                goto fput_out;
 836        count = retval;
 837
 838        if (!max)
 839                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 840
 841        pos = *ppos;
 842        if (unlikely(pos + count > max)) {
 843                retval = -EOVERFLOW;
 844                if (pos >= max)
 845                        goto fput_out;
 846                count = max - pos;
 847        }
 848
 849        fl = 0;
 850#if 0
 851        /*
 852         * We need to debate whether we can enable this or not. The
 853         * man page documents EAGAIN return for the output at least,
 854         * and the application is arguably buggy if it doesn't expect
 855         * EAGAIN on a non-blocking file descriptor.
 856         */
 857        if (in_file->f_flags & O_NONBLOCK)
 858                fl = SPLICE_F_NONBLOCK;
 859#endif
 860        retval = do_splice_direct(in_file, ppos, out_file, count, fl);
 861
 862        if (retval > 0) {
 863                add_rchar(current, retval);
 864                add_wchar(current, retval);
 865        }
 866
 867        inc_syscr(current);
 868        inc_syscw(current);
 869        if (*ppos > max)
 870                retval = -EOVERFLOW;
 871
 872fput_out:
 873        fput_light(out_file, fput_needed_out);
 874fput_in:
 875        fput_light(in_file, fput_needed_in);
 876out:
 877        return retval;
 878}
 879
 880SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
 881{
 882        loff_t pos;
 883        off_t off;
 884        ssize_t ret;
 885
 886        if (offset) {
 887                if (unlikely(get_user(off, offset)))
 888                        return -EFAULT;
 889                pos = off;
 890                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
 891                if (unlikely(put_user(pos, offset)))
 892                        return -EFAULT;
 893                return ret;
 894        }
 895
 896        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 897}
 898
 899SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
 900{
 901        loff_t pos;
 902        ssize_t ret;
 903
 904        if (offset) {
 905                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
 906                        return -EFAULT;
 907                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
 908                if (unlikely(put_user(pos, offset)))
 909                        return -EFAULT;
 910                return ret;
 911        }
 912
 913        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 914}
 915