1
2
3
4
5
6
7
8#include <linux/slab.h>
9#include <linux/stat.h>
10#include <linux/sched/xacct.h>
11#include <linux/fcntl.h>
12#include <linux/file.h>
13#include <linux/uio.h>
14#include <linux/fsnotify.h>
15#include <linux/security.h>
16#include <linux/export.h>
17#include <linux/syscalls.h>
18#include <linux/pagemap.h>
19#include <linux/splice.h>
20#include <linux/compat.h>
21#include <linux/mount.h>
22#include <linux/fs.h>
23#include "internal.h"
24
25#include <linux/uaccess.h>
26#include <asm/unistd.h>
27
28const struct file_operations generic_ro_fops = {
29 .llseek = generic_file_llseek,
30 .read_iter = generic_file_read_iter,
31 .mmap = generic_file_readonly_mmap,
32 .splice_read = generic_file_splice_read,
33};
34
35EXPORT_SYMBOL(generic_ro_fops);
36
37static inline bool unsigned_offsets(struct file *file)
38{
39 return file->f_mode & FMODE_UNSIGNED_OFFSET;
40}
41
42
43
44
45
46
47
48
49
50
51
52
53
54loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
55{
56 if (offset < 0 && !unsigned_offsets(file))
57 return -EINVAL;
58 if (offset > maxsize)
59 return -EINVAL;
60
61 if (offset != file->f_pos) {
62 file->f_pos = offset;
63 file->f_version = 0;
64 }
65 return offset;
66}
67EXPORT_SYMBOL(vfs_setpos);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85loff_t
86generic_file_llseek_size(struct file *file, loff_t offset, int whence,
87 loff_t maxsize, loff_t eof)
88{
89 switch (whence) {
90 case SEEK_END:
91 offset += eof;
92 break;
93 case SEEK_CUR:
94
95
96
97
98
99
100 if (offset == 0)
101 return file->f_pos;
102
103
104
105
106
107 spin_lock(&file->f_lock);
108 offset = vfs_setpos(file, file->f_pos + offset, maxsize);
109 spin_unlock(&file->f_lock);
110 return offset;
111 case SEEK_DATA:
112
113
114
115
116 if ((unsigned long long)offset >= eof)
117 return -ENXIO;
118 break;
119 case SEEK_HOLE:
120
121
122
123
124 if ((unsigned long long)offset >= eof)
125 return -ENXIO;
126 offset = eof;
127 break;
128 }
129
130 return vfs_setpos(file, offset, maxsize);
131}
132EXPORT_SYMBOL(generic_file_llseek_size);
133
134
135
136
137
138
139
140
141
142
143
144loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
145{
146 struct inode *inode = file->f_mapping->host;
147
148 return generic_file_llseek_size(file, offset, whence,
149 inode->i_sb->s_maxbytes,
150 i_size_read(inode));
151}
152EXPORT_SYMBOL(generic_file_llseek);
153
154
155
156
157
158
159
160
161
162loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
163{
164 switch (whence) {
165 case SEEK_SET: case SEEK_CUR: case SEEK_END:
166 return generic_file_llseek_size(file, offset, whence,
167 size, size);
168 default:
169 return -EINVAL;
170 }
171}
172EXPORT_SYMBOL(fixed_size_llseek);
173
174
175
176
177
178
179
180
181loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
182{
183 switch (whence) {
184 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence,
186 OFFSET_MAX, 0);
187 default:
188 return -EINVAL;
189 }
190}
191EXPORT_SYMBOL(no_seek_end_llseek);
192
193
194
195
196
197
198
199
200
201loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
202{
203 switch (whence) {
204 case SEEK_SET: case SEEK_CUR:
205 return generic_file_llseek_size(file, offset, whence,
206 size, 0);
207 default:
208 return -EINVAL;
209 }
210}
211EXPORT_SYMBOL(no_seek_end_llseek_size);
212
213
214
215
216
217
218
219
220
221
222
223
224loff_t noop_llseek(struct file *file, loff_t offset, int whence)
225{
226 return file->f_pos;
227}
228EXPORT_SYMBOL(noop_llseek);
229
230loff_t no_llseek(struct file *file, loff_t offset, int whence)
231{
232 return -ESPIPE;
233}
234EXPORT_SYMBOL(no_llseek);
235
236loff_t default_llseek(struct file *file, loff_t offset, int whence)
237{
238 struct inode *inode = file_inode(file);
239 loff_t retval;
240
241 inode_lock(inode);
242 switch (whence) {
243 case SEEK_END:
244 offset += i_size_read(inode);
245 break;
246 case SEEK_CUR:
247 if (offset == 0) {
248 retval = file->f_pos;
249 goto out;
250 }
251 offset += file->f_pos;
252 break;
253 case SEEK_DATA:
254
255
256
257
258
259 if (offset >= inode->i_size) {
260 retval = -ENXIO;
261 goto out;
262 }
263 break;
264 case SEEK_HOLE:
265
266
267
268
269
270 if (offset >= inode->i_size) {
271 retval = -ENXIO;
272 goto out;
273 }
274 offset = inode->i_size;
275 break;
276 }
277 retval = -EINVAL;
278 if (offset >= 0 || unsigned_offsets(file)) {
279 if (offset != file->f_pos) {
280 file->f_pos = offset;
281 file->f_version = 0;
282 }
283 retval = offset;
284 }
285out:
286 inode_unlock(inode);
287 return retval;
288}
289EXPORT_SYMBOL(default_llseek);
290
291loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
292{
293 loff_t (*fn)(struct file *, loff_t, int);
294
295 fn = no_llseek;
296 if (file->f_mode & FMODE_LSEEK) {
297 if (file->f_op->llseek)
298 fn = file->f_op->llseek;
299 }
300 return fn(file, offset, whence);
301}
302EXPORT_SYMBOL(vfs_llseek);
303
304off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
305{
306 off_t retval;
307 struct fd f = fdget_pos(fd);
308 if (!f.file)
309 return -EBADF;
310
311 retval = -EINVAL;
312 if (whence <= SEEK_MAX) {
313 loff_t res = vfs_llseek(f.file, offset, whence);
314 retval = res;
315 if (res != (loff_t)retval)
316 retval = -EOVERFLOW;
317 }
318 fdput_pos(f);
319 return retval;
320}
321
322SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
323{
324 return ksys_lseek(fd, offset, whence);
325}
326
327#ifdef CONFIG_COMPAT
328COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
329{
330 return ksys_lseek(fd, offset, whence);
331}
332#endif
333
334#ifdef __ARCH_WANT_SYS_LLSEEK
335SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
336 unsigned long, offset_low, loff_t __user *, result,
337 unsigned int, whence)
338{
339 int retval;
340 struct fd f = fdget_pos(fd);
341 loff_t offset;
342
343 if (!f.file)
344 return -EBADF;
345
346 retval = -EINVAL;
347 if (whence > SEEK_MAX)
348 goto out_putf;
349
350 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
351 whence);
352
353 retval = (int)offset;
354 if (offset >= 0) {
355 retval = -EFAULT;
356 if (!copy_to_user(result, &offset, sizeof(offset)))
357 retval = 0;
358 }
359out_putf:
360 fdput_pos(f);
361 return retval;
362}
363#endif
364
365int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
366{
367 struct inode *inode;
368 loff_t pos;
369 int retval = -EINVAL;
370
371 inode = file_inode(file);
372 if (unlikely((ssize_t) count < 0))
373 return retval;
374 pos = *ppos;
375 if (unlikely(pos < 0)) {
376 if (!unsigned_offsets(file))
377 return retval;
378 if (count >= -pos)
379 return -EOVERFLOW;
380 } else if (unlikely((loff_t) (pos + count) < 0)) {
381 if (!unsigned_offsets(file))
382 return retval;
383 }
384
385 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
386 retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
387 read_write == READ ? F_RDLCK : F_WRLCK);
388 if (retval < 0)
389 return retval;
390 }
391 return security_file_permission(file,
392 read_write == READ ? MAY_READ : MAY_WRITE);
393}
394
395static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
396{
397 struct iovec iov = { .iov_base = buf, .iov_len = len };
398 struct kiocb kiocb;
399 struct iov_iter iter;
400 ssize_t ret;
401
402 init_sync_kiocb(&kiocb, filp);
403 kiocb.ki_pos = *ppos;
404 iov_iter_init(&iter, READ, &iov, 1, len);
405
406 ret = call_read_iter(filp, &kiocb, &iter);
407 BUG_ON(ret == -EIOCBQUEUED);
408 *ppos = kiocb.ki_pos;
409 return ret;
410}
411
412ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
413 loff_t *pos)
414{
415 if (file->f_op->read)
416 return file->f_op->read(file, buf, count, pos);
417 else if (file->f_op->read_iter)
418 return new_sync_read(file, buf, count, pos);
419 else
420 return -EINVAL;
421}
422
423ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
424{
425 mm_segment_t old_fs;
426 ssize_t result;
427
428 old_fs = get_fs();
429 set_fs(get_ds());
430
431 result = vfs_read(file, (void __user *)buf, count, pos);
432 set_fs(old_fs);
433 return result;
434}
435EXPORT_SYMBOL(kernel_read);
436
437ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
438{
439 ssize_t ret;
440
441 if (!(file->f_mode & FMODE_READ))
442 return -EBADF;
443 if (!(file->f_mode & FMODE_CAN_READ))
444 return -EINVAL;
445 if (unlikely(!access_ok(buf, count)))
446 return -EFAULT;
447
448 ret = rw_verify_area(READ, file, pos, count);
449 if (!ret) {
450 if (count > MAX_RW_COUNT)
451 count = MAX_RW_COUNT;
452 ret = __vfs_read(file, buf, count, pos);
453 if (ret > 0) {
454 fsnotify_access(file);
455 add_rchar(current, ret);
456 }
457 inc_syscr(current);
458 }
459
460 return ret;
461}
462
463static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
464{
465 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
466 struct kiocb kiocb;
467 struct iov_iter iter;
468 ssize_t ret;
469
470 init_sync_kiocb(&kiocb, filp);
471 kiocb.ki_pos = *ppos;
472 iov_iter_init(&iter, WRITE, &iov, 1, len);
473
474 ret = call_write_iter(filp, &kiocb, &iter);
475 BUG_ON(ret == -EIOCBQUEUED);
476 if (ret > 0)
477 *ppos = kiocb.ki_pos;
478 return ret;
479}
480
481ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
482 loff_t *pos)
483{
484 if (file->f_op->write)
485 return file->f_op->write(file, p, count, pos);
486 else if (file->f_op->write_iter)
487 return new_sync_write(file, p, count, pos);
488 else
489 return -EINVAL;
490}
491
492ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
493{
494 mm_segment_t old_fs;
495 const char __user *p;
496 ssize_t ret;
497
498 if (!(file->f_mode & FMODE_CAN_WRITE))
499 return -EINVAL;
500
501 old_fs = get_fs();
502 set_fs(get_ds());
503 p = (__force const char __user *)buf;
504 if (count > MAX_RW_COUNT)
505 count = MAX_RW_COUNT;
506 ret = __vfs_write(file, p, count, pos);
507 set_fs(old_fs);
508 if (ret > 0) {
509 fsnotify_modify(file);
510 add_wchar(current, ret);
511 }
512 inc_syscw(current);
513 return ret;
514}
515EXPORT_SYMBOL(__kernel_write);
516
517ssize_t kernel_write(struct file *file, const void *buf, size_t count,
518 loff_t *pos)
519{
520 mm_segment_t old_fs;
521 ssize_t res;
522
523 old_fs = get_fs();
524 set_fs(get_ds());
525
526 res = vfs_write(file, (__force const char __user *)buf, count, pos);
527 set_fs(old_fs);
528
529 return res;
530}
531EXPORT_SYMBOL(kernel_write);
532
533ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
534{
535 ssize_t ret;
536
537 if (!(file->f_mode & FMODE_WRITE))
538 return -EBADF;
539 if (!(file->f_mode & FMODE_CAN_WRITE))
540 return -EINVAL;
541 if (unlikely(!access_ok(buf, count)))
542 return -EFAULT;
543
544 ret = rw_verify_area(WRITE, file, pos, count);
545 if (!ret) {
546 if (count > MAX_RW_COUNT)
547 count = MAX_RW_COUNT;
548 file_start_write(file);
549 ret = __vfs_write(file, buf, count, pos);
550 if (ret > 0) {
551 fsnotify_modify(file);
552 add_wchar(current, ret);
553 }
554 inc_syscw(current);
555 file_end_write(file);
556 }
557
558 return ret;
559}
560
561static inline loff_t file_pos_read(struct file *file)
562{
563 return file->f_pos;
564}
565
566static inline void file_pos_write(struct file *file, loff_t pos)
567{
568 file->f_pos = pos;
569}
570
571ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
572{
573 struct fd f = fdget_pos(fd);
574 ssize_t ret = -EBADF;
575
576 if (f.file) {
577 loff_t pos = file_pos_read(f.file);
578 ret = vfs_read(f.file, buf, count, &pos);
579 if (ret >= 0)
580 file_pos_write(f.file, pos);
581 fdput_pos(f);
582 }
583 return ret;
584}
585
586SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
587{
588 return ksys_read(fd, buf, count);
589}
590
591ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
592{
593 struct fd f = fdget_pos(fd);
594 ssize_t ret = -EBADF;
595
596 if (f.file) {
597 loff_t pos = file_pos_read(f.file);
598 ret = vfs_write(f.file, buf, count, &pos);
599 if (ret >= 0)
600 file_pos_write(f.file, pos);
601 fdput_pos(f);
602 }
603
604 return ret;
605}
606
607SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
608 size_t, count)
609{
610 return ksys_write(fd, buf, count);
611}
612
613ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
614 loff_t pos)
615{
616 struct fd f;
617 ssize_t ret = -EBADF;
618
619 if (pos < 0)
620 return -EINVAL;
621
622 f = fdget(fd);
623 if (f.file) {
624 ret = -ESPIPE;
625 if (f.file->f_mode & FMODE_PREAD)
626 ret = vfs_read(f.file, buf, count, &pos);
627 fdput(f);
628 }
629
630 return ret;
631}
632
633SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
634 size_t, count, loff_t, pos)
635{
636 return ksys_pread64(fd, buf, count, pos);
637}
638
639ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
640 size_t count, loff_t pos)
641{
642 struct fd f;
643 ssize_t ret = -EBADF;
644
645 if (pos < 0)
646 return -EINVAL;
647
648 f = fdget(fd);
649 if (f.file) {
650 ret = -ESPIPE;
651 if (f.file->f_mode & FMODE_PWRITE)
652 ret = vfs_write(f.file, buf, count, &pos);
653 fdput(f);
654 }
655
656 return ret;
657}
658
659SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
660 size_t, count, loff_t, pos)
661{
662 return ksys_pwrite64(fd, buf, count, pos);
663}
664
665static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
666 loff_t *ppos, int type, rwf_t flags)
667{
668 struct kiocb kiocb;
669 ssize_t ret;
670
671 init_sync_kiocb(&kiocb, filp);
672 ret = kiocb_set_rw_flags(&kiocb, flags);
673 if (ret)
674 return ret;
675 kiocb.ki_pos = *ppos;
676
677 if (type == READ)
678 ret = call_read_iter(filp, &kiocb, iter);
679 else
680 ret = call_write_iter(filp, &kiocb, iter);
681 BUG_ON(ret == -EIOCBQUEUED);
682 *ppos = kiocb.ki_pos;
683 return ret;
684}
685
686
687static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
688 loff_t *ppos, int type, rwf_t flags)
689{
690 ssize_t ret = 0;
691
692 if (flags & ~RWF_HIPRI)
693 return -EOPNOTSUPP;
694
695 while (iov_iter_count(iter)) {
696 struct iovec iovec = iov_iter_iovec(iter);
697 ssize_t nr;
698
699 if (type == READ) {
700 nr = filp->f_op->read(filp, iovec.iov_base,
701 iovec.iov_len, ppos);
702 } else {
703 nr = filp->f_op->write(filp, iovec.iov_base,
704 iovec.iov_len, ppos);
705 }
706
707 if (nr < 0) {
708 if (!ret)
709 ret = nr;
710 break;
711 }
712 ret += nr;
713 if (nr != iovec.iov_len)
714 break;
715 iov_iter_advance(iter, nr);
716 }
717
718 return ret;
719}
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
751 unsigned long nr_segs, unsigned long fast_segs,
752 struct iovec *fast_pointer,
753 struct iovec **ret_pointer)
754{
755 unsigned long seg;
756 ssize_t ret;
757 struct iovec *iov = fast_pointer;
758
759
760
761
762
763
764 if (nr_segs == 0) {
765 ret = 0;
766 goto out;
767 }
768
769
770
771
772
773 if (nr_segs > UIO_MAXIOV) {
774 ret = -EINVAL;
775 goto out;
776 }
777 if (nr_segs > fast_segs) {
778 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
779 if (iov == NULL) {
780 ret = -ENOMEM;
781 goto out;
782 }
783 }
784 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
785 ret = -EFAULT;
786 goto out;
787 }
788
789
790
791
792
793
794
795
796
797
798 ret = 0;
799 for (seg = 0; seg < nr_segs; seg++) {
800 void __user *buf = iov[seg].iov_base;
801 ssize_t len = (ssize_t)iov[seg].iov_len;
802
803
804
805 if (len < 0) {
806 ret = -EINVAL;
807 goto out;
808 }
809 if (type >= 0
810 && unlikely(!access_ok(buf, len))) {
811 ret = -EFAULT;
812 goto out;
813 }
814 if (len > MAX_RW_COUNT - ret) {
815 len = MAX_RW_COUNT - ret;
816 iov[seg].iov_len = len;
817 }
818 ret += len;
819 }
820out:
821 *ret_pointer = iov;
822 return ret;
823}
824
825#ifdef CONFIG_COMPAT
826ssize_t compat_rw_copy_check_uvector(int type,
827 const struct compat_iovec __user *uvector, unsigned long nr_segs,
828 unsigned long fast_segs, struct iovec *fast_pointer,
829 struct iovec **ret_pointer)
830{
831 compat_ssize_t tot_len;
832 struct iovec *iov = *ret_pointer = fast_pointer;
833 ssize_t ret = 0;
834 int seg;
835
836
837
838
839
840
841 if (nr_segs == 0)
842 goto out;
843
844 ret = -EINVAL;
845 if (nr_segs > UIO_MAXIOV)
846 goto out;
847 if (nr_segs > fast_segs) {
848 ret = -ENOMEM;
849 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
850 if (iov == NULL)
851 goto out;
852 }
853 *ret_pointer = iov;
854
855 ret = -EFAULT;
856 if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
857 goto out;
858
859
860
861
862
863
864
865
866
867 tot_len = 0;
868 ret = -EINVAL;
869 for (seg = 0; seg < nr_segs; seg++) {
870 compat_uptr_t buf;
871 compat_ssize_t len;
872
873 if (__get_user(len, &uvector->iov_len) ||
874 __get_user(buf, &uvector->iov_base)) {
875 ret = -EFAULT;
876 goto out;
877 }
878 if (len < 0)
879 goto out;
880 if (type >= 0 &&
881 !access_ok(compat_ptr(buf), len)) {
882 ret = -EFAULT;
883 goto out;
884 }
885 if (len > MAX_RW_COUNT - tot_len)
886 len = MAX_RW_COUNT - tot_len;
887 tot_len += len;
888 iov->iov_base = compat_ptr(buf);
889 iov->iov_len = (compat_size_t) len;
890 uvector++;
891 iov++;
892 }
893 ret = tot_len;
894
895out:
896 return ret;
897}
898#endif
899
900static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
901 loff_t *pos, rwf_t flags)
902{
903 size_t tot_len;
904 ssize_t ret = 0;
905
906 if (!(file->f_mode & FMODE_READ))
907 return -EBADF;
908 if (!(file->f_mode & FMODE_CAN_READ))
909 return -EINVAL;
910
911 tot_len = iov_iter_count(iter);
912 if (!tot_len)
913 goto out;
914 ret = rw_verify_area(READ, file, pos, tot_len);
915 if (ret < 0)
916 return ret;
917
918 if (file->f_op->read_iter)
919 ret = do_iter_readv_writev(file, iter, pos, READ, flags);
920 else
921 ret = do_loop_readv_writev(file, iter, pos, READ, flags);
922out:
923 if (ret >= 0)
924 fsnotify_access(file);
925 return ret;
926}
927
928ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
929 struct iov_iter *iter)
930{
931 size_t tot_len;
932 ssize_t ret = 0;
933
934 if (!file->f_op->read_iter)
935 return -EINVAL;
936 if (!(file->f_mode & FMODE_READ))
937 return -EBADF;
938 if (!(file->f_mode & FMODE_CAN_READ))
939 return -EINVAL;
940
941 tot_len = iov_iter_count(iter);
942 if (!tot_len)
943 goto out;
944 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
945 if (ret < 0)
946 return ret;
947
948 ret = call_read_iter(file, iocb, iter);
949out:
950 if (ret >= 0)
951 fsnotify_access(file);
952 return ret;
953}
954EXPORT_SYMBOL(vfs_iocb_iter_read);
955
956ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
957 rwf_t flags)
958{
959 if (!file->f_op->read_iter)
960 return -EINVAL;
961 return do_iter_read(file, iter, ppos, flags);
962}
963EXPORT_SYMBOL(vfs_iter_read);
964
965static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
966 loff_t *pos, rwf_t flags)
967{
968 size_t tot_len;
969 ssize_t ret = 0;
970
971 if (!(file->f_mode & FMODE_WRITE))
972 return -EBADF;
973 if (!(file->f_mode & FMODE_CAN_WRITE))
974 return -EINVAL;
975
976 tot_len = iov_iter_count(iter);
977 if (!tot_len)
978 return 0;
979 ret = rw_verify_area(WRITE, file, pos, tot_len);
980 if (ret < 0)
981 return ret;
982
983 if (file->f_op->write_iter)
984 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
985 else
986 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
987 if (ret > 0)
988 fsnotify_modify(file);
989 return ret;
990}
991
992ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
993 struct iov_iter *iter)
994{
995 size_t tot_len;
996 ssize_t ret = 0;
997
998 if (!file->f_op->write_iter)
999 return -EINVAL;
1000 if (!(file->f_mode & FMODE_WRITE))
1001 return -EBADF;
1002 if (!(file->f_mode & FMODE_CAN_WRITE))
1003 return -EINVAL;
1004
1005 tot_len = iov_iter_count(iter);
1006 if (!tot_len)
1007 return 0;
1008 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
1009 if (ret < 0)
1010 return ret;
1011
1012 ret = call_write_iter(file, iocb, iter);
1013 if (ret > 0)
1014 fsnotify_modify(file);
1015
1016 return ret;
1017}
1018EXPORT_SYMBOL(vfs_iocb_iter_write);
1019
1020ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
1021 rwf_t flags)
1022{
1023 if (!file->f_op->write_iter)
1024 return -EINVAL;
1025 return do_iter_write(file, iter, ppos, flags);
1026}
1027EXPORT_SYMBOL(vfs_iter_write);
1028
1029ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
1030 unsigned long vlen, loff_t *pos, rwf_t flags)
1031{
1032 struct iovec iovstack[UIO_FASTIOV];
1033 struct iovec *iov = iovstack;
1034 struct iov_iter iter;
1035 ssize_t ret;
1036
1037 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
1038 if (ret >= 0) {
1039 ret = do_iter_read(file, &iter, pos, flags);
1040 kfree(iov);
1041 }
1042
1043 return ret;
1044}
1045
1046static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
1047 unsigned long vlen, loff_t *pos, rwf_t flags)
1048{
1049 struct iovec iovstack[UIO_FASTIOV];
1050 struct iovec *iov = iovstack;
1051 struct iov_iter iter;
1052 ssize_t ret;
1053
1054 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
1055 if (ret >= 0) {
1056 file_start_write(file);
1057 ret = do_iter_write(file, &iter, pos, flags);
1058 file_end_write(file);
1059 kfree(iov);
1060 }
1061 return ret;
1062}
1063
1064static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
1065 unsigned long vlen, rwf_t flags)
1066{
1067 struct fd f = fdget_pos(fd);
1068 ssize_t ret = -EBADF;
1069
1070 if (f.file) {
1071 loff_t pos = file_pos_read(f.file);
1072 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1073 if (ret >= 0)
1074 file_pos_write(f.file, pos);
1075 fdput_pos(f);
1076 }
1077
1078 if (ret > 0)
1079 add_rchar(current, ret);
1080 inc_syscr(current);
1081 return ret;
1082}
1083
1084static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
1085 unsigned long vlen, rwf_t flags)
1086{
1087 struct fd f = fdget_pos(fd);
1088 ssize_t ret = -EBADF;
1089
1090 if (f.file) {
1091 loff_t pos = file_pos_read(f.file);
1092 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1093 if (ret >= 0)
1094 file_pos_write(f.file, pos);
1095 fdput_pos(f);
1096 }
1097
1098 if (ret > 0)
1099 add_wchar(current, ret);
1100 inc_syscw(current);
1101 return ret;
1102}
1103
1104static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
1105{
1106#define HALF_LONG_BITS (BITS_PER_LONG / 2)
1107 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
1108}
1109
1110static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
1111 unsigned long vlen, loff_t pos, rwf_t flags)
1112{
1113 struct fd f;
1114 ssize_t ret = -EBADF;
1115
1116 if (pos < 0)
1117 return -EINVAL;
1118
1119 f = fdget(fd);
1120 if (f.file) {
1121 ret = -ESPIPE;
1122 if (f.file->f_mode & FMODE_PREAD)
1123 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1124 fdput(f);
1125 }
1126
1127 if (ret > 0)
1128 add_rchar(current, ret);
1129 inc_syscr(current);
1130 return ret;
1131}
1132
1133static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1134 unsigned long vlen, loff_t pos, rwf_t flags)
1135{
1136 struct fd f;
1137 ssize_t ret = -EBADF;
1138
1139 if (pos < 0)
1140 return -EINVAL;
1141
1142 f = fdget(fd);
1143 if (f.file) {
1144 ret = -ESPIPE;
1145 if (f.file->f_mode & FMODE_PWRITE)
1146 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1147 fdput(f);
1148 }
1149
1150 if (ret > 0)
1151 add_wchar(current, ret);
1152 inc_syscw(current);
1153 return ret;
1154}
1155
1156SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1157 unsigned long, vlen)
1158{
1159 return do_readv(fd, vec, vlen, 0);
1160}
1161
1162SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1163 unsigned long, vlen)
1164{
1165 return do_writev(fd, vec, vlen, 0);
1166}
1167
1168SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1169 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1170{
1171 loff_t pos = pos_from_hilo(pos_h, pos_l);
1172
1173 return do_preadv(fd, vec, vlen, pos, 0);
1174}
1175
1176SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1177 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1178 rwf_t, flags)
1179{
1180 loff_t pos = pos_from_hilo(pos_h, pos_l);
1181
1182 if (pos == -1)
1183 return do_readv(fd, vec, vlen, flags);
1184
1185 return do_preadv(fd, vec, vlen, pos, flags);
1186}
1187
1188SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1189 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1190{
1191 loff_t pos = pos_from_hilo(pos_h, pos_l);
1192
1193 return do_pwritev(fd, vec, vlen, pos, 0);
1194}
1195
1196SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1197 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1198 rwf_t, flags)
1199{
1200 loff_t pos = pos_from_hilo(pos_h, pos_l);
1201
1202 if (pos == -1)
1203 return do_writev(fd, vec, vlen, flags);
1204
1205 return do_pwritev(fd, vec, vlen, pos, flags);
1206}
1207
1208#ifdef CONFIG_COMPAT
1209static size_t compat_readv(struct file *file,
1210 const struct compat_iovec __user *vec,
1211 unsigned long vlen, loff_t *pos, rwf_t flags)
1212{
1213 struct iovec iovstack[UIO_FASTIOV];
1214 struct iovec *iov = iovstack;
1215 struct iov_iter iter;
1216 ssize_t ret;
1217
1218 ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
1219 if (ret >= 0) {
1220 ret = do_iter_read(file, &iter, pos, flags);
1221 kfree(iov);
1222 }
1223 if (ret > 0)
1224 add_rchar(current, ret);
1225 inc_syscr(current);
1226 return ret;
1227}
1228
1229static size_t do_compat_readv(compat_ulong_t fd,
1230 const struct compat_iovec __user *vec,
1231 compat_ulong_t vlen, rwf_t flags)
1232{
1233 struct fd f = fdget_pos(fd);
1234 ssize_t ret;
1235 loff_t pos;
1236
1237 if (!f.file)
1238 return -EBADF;
1239 pos = f.file->f_pos;
1240 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1241 if (ret >= 0)
1242 f.file->f_pos = pos;
1243 fdput_pos(f);
1244 return ret;
1245
1246}
1247
1248COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1249 const struct compat_iovec __user *,vec,
1250 compat_ulong_t, vlen)
1251{
1252 return do_compat_readv(fd, vec, vlen, 0);
1253}
1254
1255static long do_compat_preadv64(unsigned long fd,
1256 const struct compat_iovec __user *vec,
1257 unsigned long vlen, loff_t pos, rwf_t flags)
1258{
1259 struct fd f;
1260 ssize_t ret;
1261
1262 if (pos < 0)
1263 return -EINVAL;
1264 f = fdget(fd);
1265 if (!f.file)
1266 return -EBADF;
1267 ret = -ESPIPE;
1268 if (f.file->f_mode & FMODE_PREAD)
1269 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1270 fdput(f);
1271 return ret;
1272}
1273
1274#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
1275COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1276 const struct compat_iovec __user *,vec,
1277 unsigned long, vlen, loff_t, pos)
1278{
1279 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1280}
1281#endif
1282
1283COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1284 const struct compat_iovec __user *,vec,
1285 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1286{
1287 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1288
1289 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1290}
1291
1292#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1293COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1294 const struct compat_iovec __user *,vec,
1295 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1296{
1297 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1298}
1299#endif
1300
1301COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1302 const struct compat_iovec __user *,vec,
1303 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1304 rwf_t, flags)
1305{
1306 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1307
1308 if (pos == -1)
1309 return do_compat_readv(fd, vec, vlen, flags);
1310
1311 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1312}
1313
1314static size_t compat_writev(struct file *file,
1315 const struct compat_iovec __user *vec,
1316 unsigned long vlen, loff_t *pos, rwf_t flags)
1317{
1318 struct iovec iovstack[UIO_FASTIOV];
1319 struct iovec *iov = iovstack;
1320 struct iov_iter iter;
1321 ssize_t ret;
1322
1323 ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
1324 if (ret >= 0) {
1325 file_start_write(file);
1326 ret = do_iter_write(file, &iter, pos, flags);
1327 file_end_write(file);
1328 kfree(iov);
1329 }
1330 if (ret > 0)
1331 add_wchar(current, ret);
1332 inc_syscw(current);
1333 return ret;
1334}
1335
1336static size_t do_compat_writev(compat_ulong_t fd,
1337 const struct compat_iovec __user* vec,
1338 compat_ulong_t vlen, rwf_t flags)
1339{
1340 struct fd f = fdget_pos(fd);
1341 ssize_t ret;
1342 loff_t pos;
1343
1344 if (!f.file)
1345 return -EBADF;
1346 pos = f.file->f_pos;
1347 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1348 if (ret >= 0)
1349 f.file->f_pos = pos;
1350 fdput_pos(f);
1351 return ret;
1352}
1353
1354COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1355 const struct compat_iovec __user *, vec,
1356 compat_ulong_t, vlen)
1357{
1358 return do_compat_writev(fd, vec, vlen, 0);
1359}
1360
1361static long do_compat_pwritev64(unsigned long fd,
1362 const struct compat_iovec __user *vec,
1363 unsigned long vlen, loff_t pos, rwf_t flags)
1364{
1365 struct fd f;
1366 ssize_t ret;
1367
1368 if (pos < 0)
1369 return -EINVAL;
1370 f = fdget(fd);
1371 if (!f.file)
1372 return -EBADF;
1373 ret = -ESPIPE;
1374 if (f.file->f_mode & FMODE_PWRITE)
1375 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1376 fdput(f);
1377 return ret;
1378}
1379
1380#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
1381COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1382 const struct compat_iovec __user *,vec,
1383 unsigned long, vlen, loff_t, pos)
1384{
1385 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1386}
1387#endif
1388
1389COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1390 const struct compat_iovec __user *,vec,
1391 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1392{
1393 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1394
1395 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1396}
1397
1398#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1399COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1400 const struct compat_iovec __user *,vec,
1401 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1402{
1403 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1404}
1405#endif
1406
1407COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1408 const struct compat_iovec __user *,vec,
1409 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1410{
1411 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1412
1413 if (pos == -1)
1414 return do_compat_writev(fd, vec, vlen, flags);
1415
1416 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1417}
1418
1419#endif
1420
1421static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1422 size_t count, loff_t max)
1423{
1424 struct fd in, out;
1425 struct inode *in_inode, *out_inode;
1426 loff_t pos;
1427 loff_t out_pos;
1428 ssize_t retval;
1429 int fl;
1430
1431
1432
1433
1434 retval = -EBADF;
1435 in = fdget(in_fd);
1436 if (!in.file)
1437 goto out;
1438 if (!(in.file->f_mode & FMODE_READ))
1439 goto fput_in;
1440 retval = -ESPIPE;
1441 if (!ppos) {
1442 pos = in.file->f_pos;
1443 } else {
1444 pos = *ppos;
1445 if (!(in.file->f_mode & FMODE_PREAD))
1446 goto fput_in;
1447 }
1448 retval = rw_verify_area(READ, in.file, &pos, count);
1449 if (retval < 0)
1450 goto fput_in;
1451 if (count > MAX_RW_COUNT)
1452 count = MAX_RW_COUNT;
1453
1454
1455
1456
1457 retval = -EBADF;
1458 out = fdget(out_fd);
1459 if (!out.file)
1460 goto fput_in;
1461 if (!(out.file->f_mode & FMODE_WRITE))
1462 goto fput_out;
1463 retval = -EINVAL;
1464 in_inode = file_inode(in.file);
1465 out_inode = file_inode(out.file);
1466 out_pos = out.file->f_pos;
1467 retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1468 if (retval < 0)
1469 goto fput_out;
1470
1471 if (!max)
1472 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1473
1474 if (unlikely(pos + count > max)) {
1475 retval = -EOVERFLOW;
1476 if (pos >= max)
1477 goto fput_out;
1478 count = max - pos;
1479 }
1480
1481 fl = 0;
1482#if 0
1483
1484
1485
1486
1487
1488
1489 if (in.file->f_flags & O_NONBLOCK)
1490 fl = SPLICE_F_NONBLOCK;
1491#endif
1492 file_start_write(out.file);
1493 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1494 file_end_write(out.file);
1495
1496 if (retval > 0) {
1497 add_rchar(current, retval);
1498 add_wchar(current, retval);
1499 fsnotify_access(in.file);
1500 fsnotify_modify(out.file);
1501 out.file->f_pos = out_pos;
1502 if (ppos)
1503 *ppos = pos;
1504 else
1505 in.file->f_pos = pos;
1506 }
1507
1508 inc_syscr(current);
1509 inc_syscw(current);
1510 if (pos > max)
1511 retval = -EOVERFLOW;
1512
1513fput_out:
1514 fdput(out);
1515fput_in:
1516 fdput(in);
1517out:
1518 return retval;
1519}
1520
1521SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1522{
1523 loff_t pos;
1524 off_t off;
1525 ssize_t ret;
1526
1527 if (offset) {
1528 if (unlikely(get_user(off, offset)))
1529 return -EFAULT;
1530 pos = off;
1531 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1532 if (unlikely(put_user(pos, offset)))
1533 return -EFAULT;
1534 return ret;
1535 }
1536
1537 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1538}
1539
1540SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1541{
1542 loff_t pos;
1543 ssize_t ret;
1544
1545 if (offset) {
1546 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1547 return -EFAULT;
1548 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1549 if (unlikely(put_user(pos, offset)))
1550 return -EFAULT;
1551 return ret;
1552 }
1553
1554 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1555}
1556
1557#ifdef CONFIG_COMPAT
1558COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1559 compat_off_t __user *, offset, compat_size_t, count)
1560{
1561 loff_t pos;
1562 off_t off;
1563 ssize_t ret;
1564
1565 if (offset) {
1566 if (unlikely(get_user(off, offset)))
1567 return -EFAULT;
1568 pos = off;
1569 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1570 if (unlikely(put_user(pos, offset)))
1571 return -EFAULT;
1572 return ret;
1573 }
1574
1575 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1576}
1577
1578COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1579 compat_loff_t __user *, offset, compat_size_t, count)
1580{
1581 loff_t pos;
1582 ssize_t ret;
1583
1584 if (offset) {
1585 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1586 return -EFAULT;
1587 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1588 if (unlikely(put_user(pos, offset)))
1589 return -EFAULT;
1590 return ret;
1591 }
1592
1593 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1594}
1595#endif
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
1619 struct file *file_out, loff_t pos_out,
1620 size_t len, unsigned int flags)
1621{
1622 return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1623 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1624}
1625EXPORT_SYMBOL(generic_copy_file_range);
1626
1627static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
1628 struct file *file_out, loff_t pos_out,
1629 size_t len, unsigned int flags)
1630{
1631
1632
1633
1634
1635
1636
1637
1638
1639 if (file_out->f_op->copy_file_range &&
1640 file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
1641 return file_out->f_op->copy_file_range(file_in, pos_in,
1642 file_out, pos_out,
1643 len, flags);
1644
1645 return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1646 flags);
1647}
1648
1649
1650
1651
1652
1653
1654ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1655 struct file *file_out, loff_t pos_out,
1656 size_t len, unsigned int flags)
1657{
1658 ssize_t ret;
1659
1660 if (flags != 0)
1661 return -EINVAL;
1662
1663 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
1664 flags);
1665 if (unlikely(ret))
1666 return ret;
1667
1668 ret = rw_verify_area(READ, file_in, &pos_in, len);
1669 if (unlikely(ret))
1670 return ret;
1671
1672 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1673 if (unlikely(ret))
1674 return ret;
1675
1676 if (len == 0)
1677 return 0;
1678
1679 file_start_write(file_out);
1680
1681
1682
1683
1684
1685 if (file_in->f_op->remap_file_range &&
1686 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
1687 loff_t cloned;
1688
1689 cloned = file_in->f_op->remap_file_range(file_in, pos_in,
1690 file_out, pos_out,
1691 min_t(loff_t, MAX_RW_COUNT, len),
1692 REMAP_FILE_CAN_SHORTEN);
1693 if (cloned > 0) {
1694 ret = cloned;
1695 goto done;
1696 }
1697 }
1698
1699 ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1700 flags);
1701 WARN_ON_ONCE(ret == -EOPNOTSUPP);
1702done:
1703 if (ret > 0) {
1704 fsnotify_access(file_in);
1705 add_rchar(current, ret);
1706 fsnotify_modify(file_out);
1707 add_wchar(current, ret);
1708 }
1709
1710 inc_syscr(current);
1711 inc_syscw(current);
1712
1713 file_end_write(file_out);
1714
1715 return ret;
1716}
1717EXPORT_SYMBOL(vfs_copy_file_range);
1718
1719SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1720 int, fd_out, loff_t __user *, off_out,
1721 size_t, len, unsigned int, flags)
1722{
1723 loff_t pos_in;
1724 loff_t pos_out;
1725 struct fd f_in;
1726 struct fd f_out;
1727 ssize_t ret = -EBADF;
1728
1729 f_in = fdget(fd_in);
1730 if (!f_in.file)
1731 goto out2;
1732
1733 f_out = fdget(fd_out);
1734 if (!f_out.file)
1735 goto out1;
1736
1737 ret = -EFAULT;
1738 if (off_in) {
1739 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1740 goto out;
1741 } else {
1742 pos_in = f_in.file->f_pos;
1743 }
1744
1745 if (off_out) {
1746 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1747 goto out;
1748 } else {
1749 pos_out = f_out.file->f_pos;
1750 }
1751
1752 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1753 flags);
1754 if (ret > 0) {
1755 pos_in += ret;
1756 pos_out += ret;
1757
1758 if (off_in) {
1759 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1760 ret = -EFAULT;
1761 } else {
1762 f_in.file->f_pos = pos_in;
1763 }
1764
1765 if (off_out) {
1766 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1767 ret = -EFAULT;
1768 } else {
1769 f_out.file->f_pos = pos_out;
1770 }
1771 }
1772
1773out:
1774 fdput(f_out);
1775out1:
1776 fdput(f_in);
1777out2:
1778 return ret;
1779}
1780
1781static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
1782 bool write)
1783{
1784 struct inode *inode = file_inode(file);
1785
1786 if (unlikely(pos < 0 || len < 0))
1787 return -EINVAL;
1788
1789 if (unlikely((loff_t) (pos + len) < 0))
1790 return -EINVAL;
1791
1792 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1793 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1794 int retval;
1795
1796 retval = locks_mandatory_area(inode, file, pos, end,
1797 write ? F_WRLCK : F_RDLCK);
1798 if (retval < 0)
1799 return retval;
1800 }
1801
1802 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1803}
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816static int generic_remap_check_len(struct inode *inode_in,
1817 struct inode *inode_out,
1818 loff_t pos_out,
1819 loff_t *len,
1820 unsigned int remap_flags)
1821{
1822 u64 blkmask = i_blocksize(inode_in) - 1;
1823 loff_t new_len = *len;
1824
1825 if ((*len & blkmask) == 0)
1826 return 0;
1827
1828 if ((remap_flags & REMAP_FILE_DEDUP) ||
1829 pos_out + *len < i_size_read(inode_out))
1830 new_len &= ~blkmask;
1831
1832 if (new_len == *len)
1833 return 0;
1834
1835 if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
1836 *len = new_len;
1837 return 0;
1838 }
1839
1840 return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
1841}
1842
1843
1844
1845
1846
1847static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1848{
1849 struct page *page;
1850
1851 page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
1852 if (IS_ERR(page))
1853 return page;
1854 if (!PageUptodate(page)) {
1855 put_page(page);
1856 return ERR_PTR(-EIO);
1857 }
1858 lock_page(page);
1859 return page;
1860}
1861
1862
1863
1864
1865
1866static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1867 struct inode *dest, loff_t destoff,
1868 loff_t len, bool *is_same)
1869{
1870 loff_t src_poff;
1871 loff_t dest_poff;
1872 void *src_addr;
1873 void *dest_addr;
1874 struct page *src_page;
1875 struct page *dest_page;
1876 loff_t cmp_len;
1877 bool same;
1878 int error;
1879
1880 error = -EINVAL;
1881 same = true;
1882 while (len) {
1883 src_poff = srcoff & (PAGE_SIZE - 1);
1884 dest_poff = destoff & (PAGE_SIZE - 1);
1885 cmp_len = min(PAGE_SIZE - src_poff,
1886 PAGE_SIZE - dest_poff);
1887 cmp_len = min(cmp_len, len);
1888 if (cmp_len <= 0)
1889 goto out_error;
1890
1891 src_page = vfs_dedupe_get_page(src, srcoff);
1892 if (IS_ERR(src_page)) {
1893 error = PTR_ERR(src_page);
1894 goto out_error;
1895 }
1896 dest_page = vfs_dedupe_get_page(dest, destoff);
1897 if (IS_ERR(dest_page)) {
1898 error = PTR_ERR(dest_page);
1899 unlock_page(src_page);
1900 put_page(src_page);
1901 goto out_error;
1902 }
1903 src_addr = kmap_atomic(src_page);
1904 dest_addr = kmap_atomic(dest_page);
1905
1906 flush_dcache_page(src_page);
1907 flush_dcache_page(dest_page);
1908
1909 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1910 same = false;
1911
1912 kunmap_atomic(dest_addr);
1913 kunmap_atomic(src_addr);
1914 unlock_page(dest_page);
1915 unlock_page(src_page);
1916 put_page(dest_page);
1917 put_page(src_page);
1918
1919 if (!same)
1920 break;
1921
1922 srcoff += cmp_len;
1923 destoff += cmp_len;
1924 len -= cmp_len;
1925 }
1926
1927 *is_same = same;
1928 return 0;
1929
1930out_error:
1931 return error;
1932}
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1943 struct file *file_out, loff_t pos_out,
1944 loff_t *len, unsigned int remap_flags)
1945{
1946 struct inode *inode_in = file_inode(file_in);
1947 struct inode *inode_out = file_inode(file_out);
1948 bool same_inode = (inode_in == inode_out);
1949 int ret;
1950
1951
1952 if (IS_IMMUTABLE(inode_out))
1953 return -EPERM;
1954
1955 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1956 return -ETXTBSY;
1957
1958
1959 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1960 return -EISDIR;
1961 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1962 return -EINVAL;
1963
1964
1965 if (*len == 0) {
1966 loff_t isize = i_size_read(inode_in);
1967
1968 if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
1969 return 0;
1970 if (pos_in > isize)
1971 return -EINVAL;
1972 *len = isize - pos_in;
1973 if (*len == 0)
1974 return 0;
1975 }
1976
1977
1978 ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
1979 remap_flags);
1980 if (ret)
1981 return ret;
1982
1983
1984 inode_dio_wait(inode_in);
1985 if (!same_inode)
1986 inode_dio_wait(inode_out);
1987
1988 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1989 pos_in, pos_in + *len - 1);
1990 if (ret)
1991 return ret;
1992
1993 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1994 pos_out, pos_out + *len - 1);
1995 if (ret)
1996 return ret;
1997
1998
1999
2000
2001 if (remap_flags & REMAP_FILE_DEDUP) {
2002 bool is_same = false;
2003
2004 ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
2005 inode_out, pos_out, *len, &is_same);
2006 if (ret)
2007 return ret;
2008 if (!is_same)
2009 return -EBADE;
2010 }
2011
2012 ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
2013 remap_flags);
2014 if (ret)
2015 return ret;
2016
2017
2018 if (!(remap_flags & REMAP_FILE_DEDUP))
2019 ret = file_modified(file_out);
2020
2021 return ret;
2022}
2023EXPORT_SYMBOL(generic_remap_file_range_prep);
2024
2025loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
2026 struct file *file_out, loff_t pos_out,
2027 loff_t len, unsigned int remap_flags)
2028{
2029 loff_t ret;
2030
2031 WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
2032
2033
2034
2035
2036
2037
2038 if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
2039 return -EXDEV;
2040
2041 ret = generic_file_rw_checks(file_in, file_out);
2042 if (ret < 0)
2043 return ret;
2044
2045 if (!file_in->f_op->remap_file_range)
2046 return -EOPNOTSUPP;
2047
2048 ret = remap_verify_area(file_in, pos_in, len, false);
2049 if (ret)
2050 return ret;
2051
2052 ret = remap_verify_area(file_out, pos_out, len, true);
2053 if (ret)
2054 return ret;
2055
2056 ret = file_in->f_op->remap_file_range(file_in, pos_in,
2057 file_out, pos_out, len, remap_flags);
2058 if (ret < 0)
2059 return ret;
2060
2061 fsnotify_access(file_in);
2062 fsnotify_modify(file_out);
2063 return ret;
2064}
2065EXPORT_SYMBOL(do_clone_file_range);
2066
2067loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
2068 struct file *file_out, loff_t pos_out,
2069 loff_t len, unsigned int remap_flags)
2070{
2071 loff_t ret;
2072
2073 file_start_write(file_out);
2074 ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
2075 remap_flags);
2076 file_end_write(file_out);
2077
2078 return ret;
2079}
2080EXPORT_SYMBOL(vfs_clone_file_range);
2081
2082loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2083 struct file *dst_file, loff_t dst_pos,
2084 loff_t len, unsigned int remap_flags)
2085{
2086 loff_t ret;
2087
2088 WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
2089 REMAP_FILE_CAN_SHORTEN));
2090
2091 ret = mnt_want_write_file(dst_file);
2092 if (ret)
2093 return ret;
2094
2095 ret = remap_verify_area(dst_file, dst_pos, len, true);
2096 if (ret < 0)
2097 goto out_drop_write;
2098
2099 ret = -EINVAL;
2100 if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
2101 goto out_drop_write;
2102
2103 ret = -EXDEV;
2104 if (src_file->f_path.mnt != dst_file->f_path.mnt)
2105 goto out_drop_write;
2106
2107 ret = -EISDIR;
2108 if (S_ISDIR(file_inode(dst_file)->i_mode))
2109 goto out_drop_write;
2110
2111 ret = -EINVAL;
2112 if (!dst_file->f_op->remap_file_range)
2113 goto out_drop_write;
2114
2115 if (len == 0) {
2116 ret = 0;
2117 goto out_drop_write;
2118 }
2119
2120 ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
2121 dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
2122out_drop_write:
2123 mnt_drop_write_file(dst_file);
2124
2125 return ret;
2126}
2127EXPORT_SYMBOL(vfs_dedupe_file_range_one);
2128
2129int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2130{
2131 struct file_dedupe_range_info *info;
2132 struct inode *src = file_inode(file);
2133 u64 off;
2134 u64 len;
2135 int i;
2136 int ret;
2137 u16 count = same->dest_count;
2138 loff_t deduped;
2139
2140 if (!(file->f_mode & FMODE_READ))
2141 return -EINVAL;
2142
2143 if (same->reserved1 || same->reserved2)
2144 return -EINVAL;
2145
2146 off = same->src_offset;
2147 len = same->src_length;
2148
2149 if (S_ISDIR(src->i_mode))
2150 return -EISDIR;
2151
2152 if (!S_ISREG(src->i_mode))
2153 return -EINVAL;
2154
2155 if (!file->f_op->remap_file_range)
2156 return -EOPNOTSUPP;
2157
2158 ret = remap_verify_area(file, off, len, false);
2159 if (ret < 0)
2160 return ret;
2161 ret = 0;
2162
2163 if (off + len > i_size_read(src))
2164 return -EINVAL;
2165
2166
2167 len = min_t(u64, len, 1 << 30);
2168
2169
2170 for (i = 0; i < count; i++) {
2171 same->info[i].bytes_deduped = 0ULL;
2172 same->info[i].status = FILE_DEDUPE_RANGE_SAME;
2173 }
2174
2175 for (i = 0, info = same->info; i < count; i++, info++) {
2176 struct fd dst_fd = fdget(info->dest_fd);
2177 struct file *dst_file = dst_fd.file;
2178
2179 if (!dst_file) {
2180 info->status = -EBADF;
2181 goto next_loop;
2182 }
2183
2184 if (info->reserved) {
2185 info->status = -EINVAL;
2186 goto next_fdput;
2187 }
2188
2189 deduped = vfs_dedupe_file_range_one(file, off, dst_file,
2190 info->dest_offset, len,
2191 REMAP_FILE_CAN_SHORTEN);
2192 if (deduped == -EBADE)
2193 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2194 else if (deduped < 0)
2195 info->status = deduped;
2196 else
2197 info->bytes_deduped = len;
2198
2199next_fdput:
2200 fdput(dst_fd);
2201next_loop:
2202 if (fatal_signal_pending(current))
2203 break;
2204 }
2205 return ret;
2206}
2207EXPORT_SYMBOL(vfs_dedupe_file_range);
2208