1
2
3
4
5
6
7
8#include <linux/slab.h>
9#include <linux/stat.h>
10#include <linux/sched/xacct.h>
11#include <linux/fcntl.h>
12#include <linux/file.h>
13#include <linux/uio.h>
14#include <linux/fsnotify.h>
15#include <linux/security.h>
16#include <linux/export.h>
17#include <linux/syscalls.h>
18#include <linux/pagemap.h>
19#include <linux/splice.h>
20#include <linux/compat.h>
21#include <linux/mount.h>
22#include <linux/fs.h>
23#include "internal.h"
24
25#include <linux/uaccess.h>
26#include <asm/unistd.h>
27
28const struct file_operations generic_ro_fops = {
29 .llseek = generic_file_llseek,
30 .read_iter = generic_file_read_iter,
31 .mmap = generic_file_readonly_mmap,
32 .splice_read = generic_file_splice_read,
33};
34
35EXPORT_SYMBOL(generic_ro_fops);
36
37static inline bool unsigned_offsets(struct file *file)
38{
39 return file->f_mode & FMODE_UNSIGNED_OFFSET;
40}
41
42
43
44
45
46
47
48
49
50
51
52
53
54loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
55{
56 if (offset < 0 && !unsigned_offsets(file))
57 return -EINVAL;
58 if (offset > maxsize)
59 return -EINVAL;
60
61 if (offset != file->f_pos) {
62 file->f_pos = offset;
63 file->f_version = 0;
64 }
65 return offset;
66}
67EXPORT_SYMBOL(vfs_setpos);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85loff_t
86generic_file_llseek_size(struct file *file, loff_t offset, int whence,
87 loff_t maxsize, loff_t eof)
88{
89 switch (whence) {
90 case SEEK_END:
91 offset += eof;
92 break;
93 case SEEK_CUR:
94
95
96
97
98
99
100 if (offset == 0)
101 return file->f_pos;
102
103
104
105
106
107 spin_lock(&file->f_lock);
108 offset = vfs_setpos(file, file->f_pos + offset, maxsize);
109 spin_unlock(&file->f_lock);
110 return offset;
111 case SEEK_DATA:
112
113
114
115
116 if ((unsigned long long)offset >= eof)
117 return -ENXIO;
118 break;
119 case SEEK_HOLE:
120
121
122
123
124 if ((unsigned long long)offset >= eof)
125 return -ENXIO;
126 offset = eof;
127 break;
128 }
129
130 return vfs_setpos(file, offset, maxsize);
131}
132EXPORT_SYMBOL(generic_file_llseek_size);
133
134
135
136
137
138
139
140
141
142
143
144loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
145{
146 struct inode *inode = file->f_mapping->host;
147
148 return generic_file_llseek_size(file, offset, whence,
149 inode->i_sb->s_maxbytes,
150 i_size_read(inode));
151}
152EXPORT_SYMBOL(generic_file_llseek);
153
154
155
156
157
158
159
160
161
162loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
163{
164 switch (whence) {
165 case SEEK_SET: case SEEK_CUR: case SEEK_END:
166 return generic_file_llseek_size(file, offset, whence,
167 size, size);
168 default:
169 return -EINVAL;
170 }
171}
172EXPORT_SYMBOL(fixed_size_llseek);
173
174
175
176
177
178
179
180
181loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
182{
183 switch (whence) {
184 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence,
186 OFFSET_MAX, 0);
187 default:
188 return -EINVAL;
189 }
190}
191EXPORT_SYMBOL(no_seek_end_llseek);
192
193
194
195
196
197
198
199
200
201loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
202{
203 switch (whence) {
204 case SEEK_SET: case SEEK_CUR:
205 return generic_file_llseek_size(file, offset, whence,
206 size, 0);
207 default:
208 return -EINVAL;
209 }
210}
211EXPORT_SYMBOL(no_seek_end_llseek_size);
212
213
214
215
216
217
218
219
220
221
222
223
224loff_t noop_llseek(struct file *file, loff_t offset, int whence)
225{
226 return file->f_pos;
227}
228EXPORT_SYMBOL(noop_llseek);
229
230loff_t no_llseek(struct file *file, loff_t offset, int whence)
231{
232 return -ESPIPE;
233}
234EXPORT_SYMBOL(no_llseek);
235
236loff_t default_llseek(struct file *file, loff_t offset, int whence)
237{
238 struct inode *inode = file_inode(file);
239 loff_t retval;
240
241 inode_lock(inode);
242 switch (whence) {
243 case SEEK_END:
244 offset += i_size_read(inode);
245 break;
246 case SEEK_CUR:
247 if (offset == 0) {
248 retval = file->f_pos;
249 goto out;
250 }
251 offset += file->f_pos;
252 break;
253 case SEEK_DATA:
254
255
256
257
258
259 if (offset >= inode->i_size) {
260 retval = -ENXIO;
261 goto out;
262 }
263 break;
264 case SEEK_HOLE:
265
266
267
268
269
270 if (offset >= inode->i_size) {
271 retval = -ENXIO;
272 goto out;
273 }
274 offset = inode->i_size;
275 break;
276 }
277 retval = -EINVAL;
278 if (offset >= 0 || unsigned_offsets(file)) {
279 if (offset != file->f_pos) {
280 file->f_pos = offset;
281 file->f_version = 0;
282 }
283 retval = offset;
284 }
285out:
286 inode_unlock(inode);
287 return retval;
288}
289EXPORT_SYMBOL(default_llseek);
290
291loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
292{
293 loff_t (*fn)(struct file *, loff_t, int);
294
295 fn = no_llseek;
296 if (file->f_mode & FMODE_LSEEK) {
297 if (file->f_op->llseek)
298 fn = file->f_op->llseek;
299 }
300 return fn(file, offset, whence);
301}
302EXPORT_SYMBOL(vfs_llseek);
303
304off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
305{
306 off_t retval;
307 struct fd f = fdget_pos(fd);
308 if (!f.file)
309 return -EBADF;
310
311 retval = -EINVAL;
312 if (whence <= SEEK_MAX) {
313 loff_t res = vfs_llseek(f.file, offset, whence);
314 retval = res;
315 if (res != (loff_t)retval)
316 retval = -EOVERFLOW;
317 }
318 fdput_pos(f);
319 return retval;
320}
321
322SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
323{
324 return ksys_lseek(fd, offset, whence);
325}
326
327#ifdef CONFIG_COMPAT
328COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
329{
330 return ksys_lseek(fd, offset, whence);
331}
332#endif
333
334#ifdef __ARCH_WANT_SYS_LLSEEK
335SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
336 unsigned long, offset_low, loff_t __user *, result,
337 unsigned int, whence)
338{
339 int retval;
340 struct fd f = fdget_pos(fd);
341 loff_t offset;
342
343 if (!f.file)
344 return -EBADF;
345
346 retval = -EINVAL;
347 if (whence > SEEK_MAX)
348 goto out_putf;
349
350 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
351 whence);
352
353 retval = (int)offset;
354 if (offset >= 0) {
355 retval = -EFAULT;
356 if (!copy_to_user(result, &offset, sizeof(offset)))
357 retval = 0;
358 }
359out_putf:
360 fdput_pos(f);
361 return retval;
362}
363#endif
364
365int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
366{
367 struct inode *inode;
368 loff_t pos;
369 int retval = -EINVAL;
370
371 inode = file_inode(file);
372 if (unlikely((ssize_t) count < 0))
373 return retval;
374 pos = *ppos;
375 if (unlikely(pos < 0)) {
376 if (!unsigned_offsets(file))
377 return retval;
378 if (count >= -pos)
379 return -EOVERFLOW;
380 } else if (unlikely((loff_t) (pos + count) < 0)) {
381 if (!unsigned_offsets(file))
382 return retval;
383 }
384
385 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
386 retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
387 read_write == READ ? F_RDLCK : F_WRLCK);
388 if (retval < 0)
389 return retval;
390 }
391 return security_file_permission(file,
392 read_write == READ ? MAY_READ : MAY_WRITE);
393}
394
395static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
396{
397 struct iovec iov = { .iov_base = buf, .iov_len = len };
398 struct kiocb kiocb;
399 struct iov_iter iter;
400 ssize_t ret;
401
402 init_sync_kiocb(&kiocb, filp);
403 kiocb.ki_pos = *ppos;
404 iov_iter_init(&iter, READ, &iov, 1, len);
405
406 ret = call_read_iter(filp, &kiocb, &iter);
407 BUG_ON(ret == -EIOCBQUEUED);
408 *ppos = kiocb.ki_pos;
409 return ret;
410}
411
412ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
413 loff_t *pos)
414{
415 if (file->f_op->read)
416 return file->f_op->read(file, buf, count, pos);
417 else if (file->f_op->read_iter)
418 return new_sync_read(file, buf, count, pos);
419 else
420 return -EINVAL;
421}
422
423ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
424{
425 mm_segment_t old_fs;
426 ssize_t result;
427
428 old_fs = get_fs();
429 set_fs(get_ds());
430
431 result = vfs_read(file, (void __user *)buf, count, pos);
432 set_fs(old_fs);
433 return result;
434}
435EXPORT_SYMBOL(kernel_read);
436
437ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
438{
439 ssize_t ret;
440
441 if (!(file->f_mode & FMODE_READ))
442 return -EBADF;
443 if (!(file->f_mode & FMODE_CAN_READ))
444 return -EINVAL;
445 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
446 return -EFAULT;
447
448 ret = rw_verify_area(READ, file, pos, count);
449 if (!ret) {
450 if (count > MAX_RW_COUNT)
451 count = MAX_RW_COUNT;
452 ret = __vfs_read(file, buf, count, pos);
453 if (ret > 0) {
454 fsnotify_access(file);
455 add_rchar(current, ret);
456 }
457 inc_syscr(current);
458 }
459
460 return ret;
461}
462
463static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
464{
465 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
466 struct kiocb kiocb;
467 struct iov_iter iter;
468 ssize_t ret;
469
470 init_sync_kiocb(&kiocb, filp);
471 kiocb.ki_pos = *ppos;
472 iov_iter_init(&iter, WRITE, &iov, 1, len);
473
474 ret = call_write_iter(filp, &kiocb, &iter);
475 BUG_ON(ret == -EIOCBQUEUED);
476 if (ret > 0)
477 *ppos = kiocb.ki_pos;
478 return ret;
479}
480
481ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
482 loff_t *pos)
483{
484 if (file->f_op->write)
485 return file->f_op->write(file, p, count, pos);
486 else if (file->f_op->write_iter)
487 return new_sync_write(file, p, count, pos);
488 else
489 return -EINVAL;
490}
491
492ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
493{
494 mm_segment_t old_fs;
495 const char __user *p;
496 ssize_t ret;
497
498 if (!(file->f_mode & FMODE_CAN_WRITE))
499 return -EINVAL;
500
501 old_fs = get_fs();
502 set_fs(get_ds());
503 p = (__force const char __user *)buf;
504 if (count > MAX_RW_COUNT)
505 count = MAX_RW_COUNT;
506 ret = __vfs_write(file, p, count, pos);
507 set_fs(old_fs);
508 if (ret > 0) {
509 fsnotify_modify(file);
510 add_wchar(current, ret);
511 }
512 inc_syscw(current);
513 return ret;
514}
515EXPORT_SYMBOL(__kernel_write);
516
517ssize_t kernel_write(struct file *file, const void *buf, size_t count,
518 loff_t *pos)
519{
520 mm_segment_t old_fs;
521 ssize_t res;
522
523 old_fs = get_fs();
524 set_fs(get_ds());
525
526 res = vfs_write(file, (__force const char __user *)buf, count, pos);
527 set_fs(old_fs);
528
529 return res;
530}
531EXPORT_SYMBOL(kernel_write);
532
533ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
534{
535 ssize_t ret;
536
537 if (!(file->f_mode & FMODE_WRITE))
538 return -EBADF;
539 if (!(file->f_mode & FMODE_CAN_WRITE))
540 return -EINVAL;
541 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
542 return -EFAULT;
543
544 ret = rw_verify_area(WRITE, file, pos, count);
545 if (!ret) {
546 if (count > MAX_RW_COUNT)
547 count = MAX_RW_COUNT;
548 file_start_write(file);
549 ret = __vfs_write(file, buf, count, pos);
550 if (ret > 0) {
551 fsnotify_modify(file);
552 add_wchar(current, ret);
553 }
554 inc_syscw(current);
555 file_end_write(file);
556 }
557
558 return ret;
559}
560
561static inline loff_t file_pos_read(struct file *file)
562{
563 return file->f_pos;
564}
565
566static inline void file_pos_write(struct file *file, loff_t pos)
567{
568 file->f_pos = pos;
569}
570
571ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
572{
573 struct fd f = fdget_pos(fd);
574 ssize_t ret = -EBADF;
575
576 if (f.file) {
577 loff_t pos = file_pos_read(f.file);
578 ret = vfs_read(f.file, buf, count, &pos);
579 if (ret >= 0)
580 file_pos_write(f.file, pos);
581 fdput_pos(f);
582 }
583 return ret;
584}
585
586SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
587{
588 return ksys_read(fd, buf, count);
589}
590
591ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
592{
593 struct fd f = fdget_pos(fd);
594 ssize_t ret = -EBADF;
595
596 if (f.file) {
597 loff_t pos = file_pos_read(f.file);
598 ret = vfs_write(f.file, buf, count, &pos);
599 if (ret >= 0)
600 file_pos_write(f.file, pos);
601 fdput_pos(f);
602 }
603
604 return ret;
605}
606
607SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
608 size_t, count)
609{
610 return ksys_write(fd, buf, count);
611}
612
613ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
614 loff_t pos)
615{
616 struct fd f;
617 ssize_t ret = -EBADF;
618
619 if (pos < 0)
620 return -EINVAL;
621
622 f = fdget(fd);
623 if (f.file) {
624 ret = -ESPIPE;
625 if (f.file->f_mode & FMODE_PREAD)
626 ret = vfs_read(f.file, buf, count, &pos);
627 fdput(f);
628 }
629
630 return ret;
631}
632
633SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
634 size_t, count, loff_t, pos)
635{
636 return ksys_pread64(fd, buf, count, pos);
637}
638
639ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
640 size_t count, loff_t pos)
641{
642 struct fd f;
643 ssize_t ret = -EBADF;
644
645 if (pos < 0)
646 return -EINVAL;
647
648 f = fdget(fd);
649 if (f.file) {
650 ret = -ESPIPE;
651 if (f.file->f_mode & FMODE_PWRITE)
652 ret = vfs_write(f.file, buf, count, &pos);
653 fdput(f);
654 }
655
656 return ret;
657}
658
659SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
660 size_t, count, loff_t, pos)
661{
662 return ksys_pwrite64(fd, buf, count, pos);
663}
664
665static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
666 loff_t *ppos, int type, rwf_t flags)
667{
668 struct kiocb kiocb;
669 ssize_t ret;
670
671 init_sync_kiocb(&kiocb, filp);
672 ret = kiocb_set_rw_flags(&kiocb, flags);
673 if (ret)
674 return ret;
675 kiocb.ki_pos = *ppos;
676
677 if (type == READ)
678 ret = call_read_iter(filp, &kiocb, iter);
679 else
680 ret = call_write_iter(filp, &kiocb, iter);
681 BUG_ON(ret == -EIOCBQUEUED);
682 *ppos = kiocb.ki_pos;
683 return ret;
684}
685
686
687static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
688 loff_t *ppos, int type, rwf_t flags)
689{
690 ssize_t ret = 0;
691
692 if (flags & ~RWF_HIPRI)
693 return -EOPNOTSUPP;
694
695 while (iov_iter_count(iter)) {
696 struct iovec iovec = iov_iter_iovec(iter);
697 ssize_t nr;
698
699 if (type == READ) {
700 nr = filp->f_op->read(filp, iovec.iov_base,
701 iovec.iov_len, ppos);
702 } else {
703 nr = filp->f_op->write(filp, iovec.iov_base,
704 iovec.iov_len, ppos);
705 }
706
707 if (nr < 0) {
708 if (!ret)
709 ret = nr;
710 break;
711 }
712 ret += nr;
713 if (nr != iovec.iov_len)
714 break;
715 iov_iter_advance(iter, nr);
716 }
717
718 return ret;
719}
720
721
722#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
754 unsigned long nr_segs, unsigned long fast_segs,
755 struct iovec *fast_pointer,
756 struct iovec **ret_pointer)
757{
758 unsigned long seg;
759 ssize_t ret;
760 struct iovec *iov = fast_pointer;
761
762
763
764
765
766
767 if (nr_segs == 0) {
768 ret = 0;
769 goto out;
770 }
771
772
773
774
775
776 if (nr_segs > UIO_MAXIOV) {
777 ret = -EINVAL;
778 goto out;
779 }
780 if (nr_segs > fast_segs) {
781 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
782 if (iov == NULL) {
783 ret = -ENOMEM;
784 goto out;
785 }
786 }
787 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
788 ret = -EFAULT;
789 goto out;
790 }
791
792
793
794
795
796
797
798
799
800
801 ret = 0;
802 for (seg = 0; seg < nr_segs; seg++) {
803 void __user *buf = iov[seg].iov_base;
804 ssize_t len = (ssize_t)iov[seg].iov_len;
805
806
807
808 if (len < 0) {
809 ret = -EINVAL;
810 goto out;
811 }
812 if (type >= 0
813 && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
814 ret = -EFAULT;
815 goto out;
816 }
817 if (len > MAX_RW_COUNT - ret) {
818 len = MAX_RW_COUNT - ret;
819 iov[seg].iov_len = len;
820 }
821 ret += len;
822 }
823out:
824 *ret_pointer = iov;
825 return ret;
826}
827
828#ifdef CONFIG_COMPAT
829ssize_t compat_rw_copy_check_uvector(int type,
830 const struct compat_iovec __user *uvector, unsigned long nr_segs,
831 unsigned long fast_segs, struct iovec *fast_pointer,
832 struct iovec **ret_pointer)
833{
834 compat_ssize_t tot_len;
835 struct iovec *iov = *ret_pointer = fast_pointer;
836 ssize_t ret = 0;
837 int seg;
838
839
840
841
842
843
844 if (nr_segs == 0)
845 goto out;
846
847 ret = -EINVAL;
848 if (nr_segs > UIO_MAXIOV)
849 goto out;
850 if (nr_segs > fast_segs) {
851 ret = -ENOMEM;
852 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
853 if (iov == NULL)
854 goto out;
855 }
856 *ret_pointer = iov;
857
858 ret = -EFAULT;
859 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
860 goto out;
861
862
863
864
865
866
867
868
869
870 tot_len = 0;
871 ret = -EINVAL;
872 for (seg = 0; seg < nr_segs; seg++) {
873 compat_uptr_t buf;
874 compat_ssize_t len;
875
876 if (__get_user(len, &uvector->iov_len) ||
877 __get_user(buf, &uvector->iov_base)) {
878 ret = -EFAULT;
879 goto out;
880 }
881 if (len < 0)
882 goto out;
883 if (type >= 0 &&
884 !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
885 ret = -EFAULT;
886 goto out;
887 }
888 if (len > MAX_RW_COUNT - tot_len)
889 len = MAX_RW_COUNT - tot_len;
890 tot_len += len;
891 iov->iov_base = compat_ptr(buf);
892 iov->iov_len = (compat_size_t) len;
893 uvector++;
894 iov++;
895 }
896 ret = tot_len;
897
898out:
899 return ret;
900}
901#endif
902
903static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
904 loff_t *pos, rwf_t flags)
905{
906 size_t tot_len;
907 ssize_t ret = 0;
908
909 if (!(file->f_mode & FMODE_READ))
910 return -EBADF;
911 if (!(file->f_mode & FMODE_CAN_READ))
912 return -EINVAL;
913
914 tot_len = iov_iter_count(iter);
915 if (!tot_len)
916 goto out;
917 ret = rw_verify_area(READ, file, pos, tot_len);
918 if (ret < 0)
919 return ret;
920
921 if (file->f_op->read_iter)
922 ret = do_iter_readv_writev(file, iter, pos, READ, flags);
923 else
924 ret = do_loop_readv_writev(file, iter, pos, READ, flags);
925out:
926 if (ret >= 0)
927 fsnotify_access(file);
928 return ret;
929}
930
931ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
932 rwf_t flags)
933{
934 if (!file->f_op->read_iter)
935 return -EINVAL;
936 return do_iter_read(file, iter, ppos, flags);
937}
938EXPORT_SYMBOL(vfs_iter_read);
939
940static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
941 loff_t *pos, rwf_t flags)
942{
943 size_t tot_len;
944 ssize_t ret = 0;
945
946 if (!(file->f_mode & FMODE_WRITE))
947 return -EBADF;
948 if (!(file->f_mode & FMODE_CAN_WRITE))
949 return -EINVAL;
950
951 tot_len = iov_iter_count(iter);
952 if (!tot_len)
953 return 0;
954 ret = rw_verify_area(WRITE, file, pos, tot_len);
955 if (ret < 0)
956 return ret;
957
958 if (file->f_op->write_iter)
959 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
960 else
961 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
962 if (ret > 0)
963 fsnotify_modify(file);
964 return ret;
965}
966
967ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
968 rwf_t flags)
969{
970 if (!file->f_op->write_iter)
971 return -EINVAL;
972 return do_iter_write(file, iter, ppos, flags);
973}
974EXPORT_SYMBOL(vfs_iter_write);
975
976ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
977 unsigned long vlen, loff_t *pos, rwf_t flags)
978{
979 struct iovec iovstack[UIO_FASTIOV];
980 struct iovec *iov = iovstack;
981 struct iov_iter iter;
982 ssize_t ret;
983
984 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
985 if (ret >= 0) {
986 ret = do_iter_read(file, &iter, pos, flags);
987 kfree(iov);
988 }
989
990 return ret;
991}
992
993static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
994 unsigned long vlen, loff_t *pos, rwf_t flags)
995{
996 struct iovec iovstack[UIO_FASTIOV];
997 struct iovec *iov = iovstack;
998 struct iov_iter iter;
999 ssize_t ret;
1000
1001 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
1002 if (ret >= 0) {
1003 file_start_write(file);
1004 ret = do_iter_write(file, &iter, pos, flags);
1005 file_end_write(file);
1006 kfree(iov);
1007 }
1008 return ret;
1009}
1010
1011static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
1012 unsigned long vlen, rwf_t flags)
1013{
1014 struct fd f = fdget_pos(fd);
1015 ssize_t ret = -EBADF;
1016
1017 if (f.file) {
1018 loff_t pos = file_pos_read(f.file);
1019 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1020 if (ret >= 0)
1021 file_pos_write(f.file, pos);
1022 fdput_pos(f);
1023 }
1024
1025 if (ret > 0)
1026 add_rchar(current, ret);
1027 inc_syscr(current);
1028 return ret;
1029}
1030
1031static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
1032 unsigned long vlen, rwf_t flags)
1033{
1034 struct fd f = fdget_pos(fd);
1035 ssize_t ret = -EBADF;
1036
1037 if (f.file) {
1038 loff_t pos = file_pos_read(f.file);
1039 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1040 if (ret >= 0)
1041 file_pos_write(f.file, pos);
1042 fdput_pos(f);
1043 }
1044
1045 if (ret > 0)
1046 add_wchar(current, ret);
1047 inc_syscw(current);
1048 return ret;
1049}
1050
1051static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
1052{
1053#define HALF_LONG_BITS (BITS_PER_LONG / 2)
1054 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
1055}
1056
1057static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
1058 unsigned long vlen, loff_t pos, rwf_t flags)
1059{
1060 struct fd f;
1061 ssize_t ret = -EBADF;
1062
1063 if (pos < 0)
1064 return -EINVAL;
1065
1066 f = fdget(fd);
1067 if (f.file) {
1068 ret = -ESPIPE;
1069 if (f.file->f_mode & FMODE_PREAD)
1070 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1071 fdput(f);
1072 }
1073
1074 if (ret > 0)
1075 add_rchar(current, ret);
1076 inc_syscr(current);
1077 return ret;
1078}
1079
1080static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1081 unsigned long vlen, loff_t pos, rwf_t flags)
1082{
1083 struct fd f;
1084 ssize_t ret = -EBADF;
1085
1086 if (pos < 0)
1087 return -EINVAL;
1088
1089 f = fdget(fd);
1090 if (f.file) {
1091 ret = -ESPIPE;
1092 if (f.file->f_mode & FMODE_PWRITE)
1093 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1094 fdput(f);
1095 }
1096
1097 if (ret > 0)
1098 add_wchar(current, ret);
1099 inc_syscw(current);
1100 return ret;
1101}
1102
1103SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1104 unsigned long, vlen)
1105{
1106 return do_readv(fd, vec, vlen, 0);
1107}
1108
1109SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1110 unsigned long, vlen)
1111{
1112 return do_writev(fd, vec, vlen, 0);
1113}
1114
1115SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1116 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1117{
1118 loff_t pos = pos_from_hilo(pos_h, pos_l);
1119
1120 return do_preadv(fd, vec, vlen, pos, 0);
1121}
1122
1123SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1124 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1125 rwf_t, flags)
1126{
1127 loff_t pos = pos_from_hilo(pos_h, pos_l);
1128
1129 if (pos == -1)
1130 return do_readv(fd, vec, vlen, flags);
1131
1132 return do_preadv(fd, vec, vlen, pos, flags);
1133}
1134
1135SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1136 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1137{
1138 loff_t pos = pos_from_hilo(pos_h, pos_l);
1139
1140 return do_pwritev(fd, vec, vlen, pos, 0);
1141}
1142
1143SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1144 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1145 rwf_t, flags)
1146{
1147 loff_t pos = pos_from_hilo(pos_h, pos_l);
1148
1149 if (pos == -1)
1150 return do_writev(fd, vec, vlen, flags);
1151
1152 return do_pwritev(fd, vec, vlen, pos, flags);
1153}
1154
1155#ifdef CONFIG_COMPAT
1156static size_t compat_readv(struct file *file,
1157 const struct compat_iovec __user *vec,
1158 unsigned long vlen, loff_t *pos, rwf_t flags)
1159{
1160 struct iovec iovstack[UIO_FASTIOV];
1161 struct iovec *iov = iovstack;
1162 struct iov_iter iter;
1163 ssize_t ret;
1164
1165 ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
1166 if (ret >= 0) {
1167 ret = do_iter_read(file, &iter, pos, flags);
1168 kfree(iov);
1169 }
1170 if (ret > 0)
1171 add_rchar(current, ret);
1172 inc_syscr(current);
1173 return ret;
1174}
1175
1176static size_t do_compat_readv(compat_ulong_t fd,
1177 const struct compat_iovec __user *vec,
1178 compat_ulong_t vlen, rwf_t flags)
1179{
1180 struct fd f = fdget_pos(fd);
1181 ssize_t ret;
1182 loff_t pos;
1183
1184 if (!f.file)
1185 return -EBADF;
1186 pos = f.file->f_pos;
1187 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1188 if (ret >= 0)
1189 f.file->f_pos = pos;
1190 fdput_pos(f);
1191 return ret;
1192
1193}
1194
1195COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1196 const struct compat_iovec __user *,vec,
1197 compat_ulong_t, vlen)
1198{
1199 return do_compat_readv(fd, vec, vlen, 0);
1200}
1201
1202static long do_compat_preadv64(unsigned long fd,
1203 const struct compat_iovec __user *vec,
1204 unsigned long vlen, loff_t pos, rwf_t flags)
1205{
1206 struct fd f;
1207 ssize_t ret;
1208
1209 if (pos < 0)
1210 return -EINVAL;
1211 f = fdget(fd);
1212 if (!f.file)
1213 return -EBADF;
1214 ret = -ESPIPE;
1215 if (f.file->f_mode & FMODE_PREAD)
1216 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1217 fdput(f);
1218 return ret;
1219}
1220
1221#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
1222COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1223 const struct compat_iovec __user *,vec,
1224 unsigned long, vlen, loff_t, pos)
1225{
1226 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1227}
1228#endif
1229
1230COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1231 const struct compat_iovec __user *,vec,
1232 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1233{
1234 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1235
1236 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1237}
1238
1239#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1240COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1241 const struct compat_iovec __user *,vec,
1242 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1243{
1244 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1245}
1246#endif
1247
1248COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1249 const struct compat_iovec __user *,vec,
1250 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1251 rwf_t, flags)
1252{
1253 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1254
1255 if (pos == -1)
1256 return do_compat_readv(fd, vec, vlen, flags);
1257
1258 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1259}
1260
1261static size_t compat_writev(struct file *file,
1262 const struct compat_iovec __user *vec,
1263 unsigned long vlen, loff_t *pos, rwf_t flags)
1264{
1265 struct iovec iovstack[UIO_FASTIOV];
1266 struct iovec *iov = iovstack;
1267 struct iov_iter iter;
1268 ssize_t ret;
1269
1270 ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
1271 if (ret >= 0) {
1272 file_start_write(file);
1273 ret = do_iter_write(file, &iter, pos, flags);
1274 file_end_write(file);
1275 kfree(iov);
1276 }
1277 if (ret > 0)
1278 add_wchar(current, ret);
1279 inc_syscw(current);
1280 return ret;
1281}
1282
1283static size_t do_compat_writev(compat_ulong_t fd,
1284 const struct compat_iovec __user* vec,
1285 compat_ulong_t vlen, rwf_t flags)
1286{
1287 struct fd f = fdget_pos(fd);
1288 ssize_t ret;
1289 loff_t pos;
1290
1291 if (!f.file)
1292 return -EBADF;
1293 pos = f.file->f_pos;
1294 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1295 if (ret >= 0)
1296 f.file->f_pos = pos;
1297 fdput_pos(f);
1298 return ret;
1299}
1300
1301COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1302 const struct compat_iovec __user *, vec,
1303 compat_ulong_t, vlen)
1304{
1305 return do_compat_writev(fd, vec, vlen, 0);
1306}
1307
1308static long do_compat_pwritev64(unsigned long fd,
1309 const struct compat_iovec __user *vec,
1310 unsigned long vlen, loff_t pos, rwf_t flags)
1311{
1312 struct fd f;
1313 ssize_t ret;
1314
1315 if (pos < 0)
1316 return -EINVAL;
1317 f = fdget(fd);
1318 if (!f.file)
1319 return -EBADF;
1320 ret = -ESPIPE;
1321 if (f.file->f_mode & FMODE_PWRITE)
1322 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1323 fdput(f);
1324 return ret;
1325}
1326
1327#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
1328COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1329 const struct compat_iovec __user *,vec,
1330 unsigned long, vlen, loff_t, pos)
1331{
1332 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1333}
1334#endif
1335
1336COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1337 const struct compat_iovec __user *,vec,
1338 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1339{
1340 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1341
1342 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1343}
1344
1345#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1346COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1347 const struct compat_iovec __user *,vec,
1348 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1349{
1350 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1351}
1352#endif
1353
1354COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1355 const struct compat_iovec __user *,vec,
1356 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1357{
1358 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1359
1360 if (pos == -1)
1361 return do_compat_writev(fd, vec, vlen, flags);
1362
1363 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1364}
1365
1366#endif
1367
1368static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1369 size_t count, loff_t max)
1370{
1371 struct fd in, out;
1372 struct inode *in_inode, *out_inode;
1373 loff_t pos;
1374 loff_t out_pos;
1375 ssize_t retval;
1376 int fl;
1377
1378
1379
1380
1381 retval = -EBADF;
1382 in = fdget(in_fd);
1383 if (!in.file)
1384 goto out;
1385 if (!(in.file->f_mode & FMODE_READ))
1386 goto fput_in;
1387 retval = -ESPIPE;
1388 if (!ppos) {
1389 pos = in.file->f_pos;
1390 } else {
1391 pos = *ppos;
1392 if (!(in.file->f_mode & FMODE_PREAD))
1393 goto fput_in;
1394 }
1395 retval = rw_verify_area(READ, in.file, &pos, count);
1396 if (retval < 0)
1397 goto fput_in;
1398 if (count > MAX_RW_COUNT)
1399 count = MAX_RW_COUNT;
1400
1401
1402
1403
1404 retval = -EBADF;
1405 out = fdget(out_fd);
1406 if (!out.file)
1407 goto fput_in;
1408 if (!(out.file->f_mode & FMODE_WRITE))
1409 goto fput_out;
1410 retval = -EINVAL;
1411 in_inode = file_inode(in.file);
1412 out_inode = file_inode(out.file);
1413 out_pos = out.file->f_pos;
1414 retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1415 if (retval < 0)
1416 goto fput_out;
1417
1418 if (!max)
1419 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1420
1421 if (unlikely(pos + count > max)) {
1422 retval = -EOVERFLOW;
1423 if (pos >= max)
1424 goto fput_out;
1425 count = max - pos;
1426 }
1427
1428 fl = 0;
1429#if 0
1430
1431
1432
1433
1434
1435
1436 if (in.file->f_flags & O_NONBLOCK)
1437 fl = SPLICE_F_NONBLOCK;
1438#endif
1439 file_start_write(out.file);
1440 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1441 file_end_write(out.file);
1442
1443 if (retval > 0) {
1444 add_rchar(current, retval);
1445 add_wchar(current, retval);
1446 fsnotify_access(in.file);
1447 fsnotify_modify(out.file);
1448 out.file->f_pos = out_pos;
1449 if (ppos)
1450 *ppos = pos;
1451 else
1452 in.file->f_pos = pos;
1453 }
1454
1455 inc_syscr(current);
1456 inc_syscw(current);
1457 if (pos > max)
1458 retval = -EOVERFLOW;
1459
1460fput_out:
1461 fdput(out);
1462fput_in:
1463 fdput(in);
1464out:
1465 return retval;
1466}
1467
1468SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1469{
1470 loff_t pos;
1471 off_t off;
1472 ssize_t ret;
1473
1474 if (offset) {
1475 if (unlikely(get_user(off, offset)))
1476 return -EFAULT;
1477 pos = off;
1478 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1479 if (unlikely(put_user(pos, offset)))
1480 return -EFAULT;
1481 return ret;
1482 }
1483
1484 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1485}
1486
1487SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1488{
1489 loff_t pos;
1490 ssize_t ret;
1491
1492 if (offset) {
1493 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1494 return -EFAULT;
1495 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1496 if (unlikely(put_user(pos, offset)))
1497 return -EFAULT;
1498 return ret;
1499 }
1500
1501 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1502}
1503
1504#ifdef CONFIG_COMPAT
1505COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1506 compat_off_t __user *, offset, compat_size_t, count)
1507{
1508 loff_t pos;
1509 off_t off;
1510 ssize_t ret;
1511
1512 if (offset) {
1513 if (unlikely(get_user(off, offset)))
1514 return -EFAULT;
1515 pos = off;
1516 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1517 if (unlikely(put_user(pos, offset)))
1518 return -EFAULT;
1519 return ret;
1520 }
1521
1522 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1523}
1524
1525COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1526 compat_loff_t __user *, offset, compat_size_t, count)
1527{
1528 loff_t pos;
1529 ssize_t ret;
1530
1531 if (offset) {
1532 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1533 return -EFAULT;
1534 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1535 if (unlikely(put_user(pos, offset)))
1536 return -EFAULT;
1537 return ret;
1538 }
1539
1540 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1541}
1542#endif
1543
1544
1545
1546
1547
1548
1549ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1550 struct file *file_out, loff_t pos_out,
1551 size_t len, unsigned int flags)
1552{
1553 struct inode *inode_in = file_inode(file_in);
1554 struct inode *inode_out = file_inode(file_out);
1555 ssize_t ret;
1556
1557 if (flags != 0)
1558 return -EINVAL;
1559
1560 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1561 return -EISDIR;
1562 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1563 return -EINVAL;
1564
1565 ret = rw_verify_area(READ, file_in, &pos_in, len);
1566 if (unlikely(ret))
1567 return ret;
1568
1569 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1570 if (unlikely(ret))
1571 return ret;
1572
1573 if (!(file_in->f_mode & FMODE_READ) ||
1574 !(file_out->f_mode & FMODE_WRITE) ||
1575 (file_out->f_flags & O_APPEND))
1576 return -EBADF;
1577
1578
1579 if (inode_in->i_sb != inode_out->i_sb)
1580 return -EXDEV;
1581
1582 if (len == 0)
1583 return 0;
1584
1585 file_start_write(file_out);
1586
1587
1588
1589
1590
1591 if (file_in->f_op->clone_file_range) {
1592 ret = file_in->f_op->clone_file_range(file_in, pos_in,
1593 file_out, pos_out, len);
1594 if (ret == 0) {
1595 ret = len;
1596 goto done;
1597 }
1598 }
1599
1600 if (file_out->f_op->copy_file_range) {
1601 ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
1602 pos_out, len, flags);
1603 if (ret != -EOPNOTSUPP)
1604 goto done;
1605 }
1606
1607 ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1608 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1609
1610done:
1611 if (ret > 0) {
1612 fsnotify_access(file_in);
1613 add_rchar(current, ret);
1614 fsnotify_modify(file_out);
1615 add_wchar(current, ret);
1616 }
1617
1618 inc_syscr(current);
1619 inc_syscw(current);
1620
1621 file_end_write(file_out);
1622
1623 return ret;
1624}
1625EXPORT_SYMBOL(vfs_copy_file_range);
1626
1627SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1628 int, fd_out, loff_t __user *, off_out,
1629 size_t, len, unsigned int, flags)
1630{
1631 loff_t pos_in;
1632 loff_t pos_out;
1633 struct fd f_in;
1634 struct fd f_out;
1635 ssize_t ret = -EBADF;
1636
1637 f_in = fdget(fd_in);
1638 if (!f_in.file)
1639 goto out2;
1640
1641 f_out = fdget(fd_out);
1642 if (!f_out.file)
1643 goto out1;
1644
1645 ret = -EFAULT;
1646 if (off_in) {
1647 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1648 goto out;
1649 } else {
1650 pos_in = f_in.file->f_pos;
1651 }
1652
1653 if (off_out) {
1654 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1655 goto out;
1656 } else {
1657 pos_out = f_out.file->f_pos;
1658 }
1659
1660 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1661 flags);
1662 if (ret > 0) {
1663 pos_in += ret;
1664 pos_out += ret;
1665
1666 if (off_in) {
1667 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1668 ret = -EFAULT;
1669 } else {
1670 f_in.file->f_pos = pos_in;
1671 }
1672
1673 if (off_out) {
1674 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1675 ret = -EFAULT;
1676 } else {
1677 f_out.file->f_pos = pos_out;
1678 }
1679 }
1680
1681out:
1682 fdput(f_out);
1683out1:
1684 fdput(f_in);
1685out2:
1686 return ret;
1687}
1688
1689static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
1690{
1691 struct inode *inode = file_inode(file);
1692
1693 if (unlikely(pos < 0))
1694 return -EINVAL;
1695
1696 if (unlikely((loff_t) (pos + len) < 0))
1697 return -EINVAL;
1698
1699 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1700 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1701 int retval;
1702
1703 retval = locks_mandatory_area(inode, file, pos, end,
1704 write ? F_WRLCK : F_RDLCK);
1705 if (retval < 0)
1706 return retval;
1707 }
1708
1709 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1710}
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
1721 struct inode *inode_out, loff_t pos_out,
1722 u64 *len, bool is_dedupe)
1723{
1724 loff_t bs = inode_out->i_sb->s_blocksize;
1725 loff_t blen;
1726 loff_t isize;
1727 bool same_inode = (inode_in == inode_out);
1728 int ret;
1729
1730
1731 if (IS_IMMUTABLE(inode_out))
1732 return -EPERM;
1733
1734 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1735 return -ETXTBSY;
1736
1737
1738 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1739 return -EISDIR;
1740 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1741 return -EINVAL;
1742
1743
1744 isize = i_size_read(inode_in);
1745 if (isize == 0)
1746 return 0;
1747
1748
1749 if (*len == 0) {
1750 if (is_dedupe || pos_in == isize)
1751 return 0;
1752 if (pos_in > isize)
1753 return -EINVAL;
1754 *len = isize - pos_in;
1755 }
1756
1757
1758 if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
1759 pos_in + *len > isize)
1760 return -EINVAL;
1761
1762
1763 if (is_dedupe) {
1764 loff_t disize;
1765
1766 disize = i_size_read(inode_out);
1767 if (pos_out >= disize || pos_out + *len > disize)
1768 return -EINVAL;
1769 }
1770
1771
1772 if (pos_in + *len == isize)
1773 blen = ALIGN(isize, bs) - pos_in;
1774 else
1775 blen = *len;
1776
1777
1778 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
1779 !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
1780 return -EINVAL;
1781
1782
1783 if (same_inode) {
1784 if (pos_out + blen > pos_in && pos_out < pos_in + blen)
1785 return -EINVAL;
1786 }
1787
1788
1789 inode_dio_wait(inode_in);
1790 if (!same_inode)
1791 inode_dio_wait(inode_out);
1792
1793 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1794 pos_in, pos_in + *len - 1);
1795 if (ret)
1796 return ret;
1797
1798 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1799 pos_out, pos_out + *len - 1);
1800 if (ret)
1801 return ret;
1802
1803
1804
1805
1806 if (is_dedupe) {
1807 bool is_same = false;
1808
1809 ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
1810 inode_out, pos_out, *len, &is_same);
1811 if (ret)
1812 return ret;
1813 if (!is_same)
1814 return -EBADE;
1815 }
1816
1817 return 1;
1818}
1819EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
1820
1821int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1822 struct file *file_out, loff_t pos_out, u64 len)
1823{
1824 struct inode *inode_in = file_inode(file_in);
1825 struct inode *inode_out = file_inode(file_out);
1826 int ret;
1827
1828 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1829 return -EISDIR;
1830 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1831 return -EINVAL;
1832
1833
1834
1835
1836
1837
1838 if (inode_in->i_sb != inode_out->i_sb)
1839 return -EXDEV;
1840
1841 if (!(file_in->f_mode & FMODE_READ) ||
1842 !(file_out->f_mode & FMODE_WRITE) ||
1843 (file_out->f_flags & O_APPEND))
1844 return -EBADF;
1845
1846 if (!file_in->f_op->clone_file_range)
1847 return -EOPNOTSUPP;
1848
1849 ret = clone_verify_area(file_in, pos_in, len, false);
1850 if (ret)
1851 return ret;
1852
1853 ret = clone_verify_area(file_out, pos_out, len, true);
1854 if (ret)
1855 return ret;
1856
1857 if (pos_in + len > i_size_read(inode_in))
1858 return -EINVAL;
1859
1860 ret = file_in->f_op->clone_file_range(file_in, pos_in,
1861 file_out, pos_out, len);
1862 if (!ret) {
1863 fsnotify_access(file_in);
1864 fsnotify_modify(file_out);
1865 }
1866
1867 return ret;
1868}
1869EXPORT_SYMBOL(vfs_clone_file_range);
1870
1871
1872
1873
1874
1875static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1876{
1877 struct address_space *mapping;
1878 struct page *page;
1879 pgoff_t n;
1880
1881 n = offset >> PAGE_SHIFT;
1882 mapping = inode->i_mapping;
1883 page = read_mapping_page(mapping, n, NULL);
1884 if (IS_ERR(page))
1885 return page;
1886 if (!PageUptodate(page)) {
1887 put_page(page);
1888 return ERR_PTR(-EIO);
1889 }
1890 lock_page(page);
1891 return page;
1892}
1893
1894
1895
1896
1897
1898int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1899 struct inode *dest, loff_t destoff,
1900 loff_t len, bool *is_same)
1901{
1902 loff_t src_poff;
1903 loff_t dest_poff;
1904 void *src_addr;
1905 void *dest_addr;
1906 struct page *src_page;
1907 struct page *dest_page;
1908 loff_t cmp_len;
1909 bool same;
1910 int error;
1911
1912 error = -EINVAL;
1913 same = true;
1914 while (len) {
1915 src_poff = srcoff & (PAGE_SIZE - 1);
1916 dest_poff = destoff & (PAGE_SIZE - 1);
1917 cmp_len = min(PAGE_SIZE - src_poff,
1918 PAGE_SIZE - dest_poff);
1919 cmp_len = min(cmp_len, len);
1920 if (cmp_len <= 0)
1921 goto out_error;
1922
1923 src_page = vfs_dedupe_get_page(src, srcoff);
1924 if (IS_ERR(src_page)) {
1925 error = PTR_ERR(src_page);
1926 goto out_error;
1927 }
1928 dest_page = vfs_dedupe_get_page(dest, destoff);
1929 if (IS_ERR(dest_page)) {
1930 error = PTR_ERR(dest_page);
1931 unlock_page(src_page);
1932 put_page(src_page);
1933 goto out_error;
1934 }
1935 src_addr = kmap_atomic(src_page);
1936 dest_addr = kmap_atomic(dest_page);
1937
1938 flush_dcache_page(src_page);
1939 flush_dcache_page(dest_page);
1940
1941 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1942 same = false;
1943
1944 kunmap_atomic(dest_addr);
1945 kunmap_atomic(src_addr);
1946 unlock_page(dest_page);
1947 unlock_page(src_page);
1948 put_page(dest_page);
1949 put_page(src_page);
1950
1951 if (!same)
1952 break;
1953
1954 srcoff += cmp_len;
1955 destoff += cmp_len;
1956 len -= cmp_len;
1957 }
1958
1959 *is_same = same;
1960 return 0;
1961
1962out_error:
1963 return error;
1964}
1965EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
1966
1967int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
1968{
1969 struct file_dedupe_range_info *info;
1970 struct inode *src = file_inode(file);
1971 u64 off;
1972 u64 len;
1973 int i;
1974 int ret;
1975 bool is_admin = capable(CAP_SYS_ADMIN);
1976 u16 count = same->dest_count;
1977 struct file *dst_file;
1978 loff_t dst_off;
1979 ssize_t deduped;
1980
1981 if (!(file->f_mode & FMODE_READ))
1982 return -EINVAL;
1983
1984 if (same->reserved1 || same->reserved2)
1985 return -EINVAL;
1986
1987 off = same->src_offset;
1988 len = same->src_length;
1989
1990 ret = -EISDIR;
1991 if (S_ISDIR(src->i_mode))
1992 goto out;
1993
1994 ret = -EINVAL;
1995 if (!S_ISREG(src->i_mode))
1996 goto out;
1997
1998 ret = clone_verify_area(file, off, len, false);
1999 if (ret < 0)
2000 goto out;
2001 ret = 0;
2002
2003 if (off + len > i_size_read(src))
2004 return -EINVAL;
2005
2006
2007 for (i = 0; i < count; i++) {
2008 same->info[i].bytes_deduped = 0ULL;
2009 same->info[i].status = FILE_DEDUPE_RANGE_SAME;
2010 }
2011
2012 for (i = 0, info = same->info; i < count; i++, info++) {
2013 struct inode *dst;
2014 struct fd dst_fd = fdget(info->dest_fd);
2015
2016 dst_file = dst_fd.file;
2017 if (!dst_file) {
2018 info->status = -EBADF;
2019 goto next_loop;
2020 }
2021 dst = file_inode(dst_file);
2022
2023 ret = mnt_want_write_file(dst_file);
2024 if (ret) {
2025 info->status = ret;
2026 goto next_loop;
2027 }
2028
2029 dst_off = info->dest_offset;
2030 ret = clone_verify_area(dst_file, dst_off, len, true);
2031 if (ret < 0) {
2032 info->status = ret;
2033 goto next_file;
2034 }
2035 ret = 0;
2036
2037 if (info->reserved) {
2038 info->status = -EINVAL;
2039 } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2040 info->status = -EINVAL;
2041 } else if (file->f_path.mnt != dst_file->f_path.mnt) {
2042 info->status = -EXDEV;
2043 } else if (S_ISDIR(dst->i_mode)) {
2044 info->status = -EISDIR;
2045 } else if (dst_file->f_op->dedupe_file_range == NULL) {
2046 info->status = -EINVAL;
2047 } else {
2048 deduped = dst_file->f_op->dedupe_file_range(file, off,
2049 len, dst_file,
2050 info->dest_offset);
2051 if (deduped == -EBADE)
2052 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2053 else if (deduped < 0)
2054 info->status = deduped;
2055 else
2056 info->bytes_deduped += deduped;
2057 }
2058
2059next_file:
2060 mnt_drop_write_file(dst_file);
2061next_loop:
2062 fdput(dst_fd);
2063
2064 if (fatal_signal_pending(current))
2065 goto out;
2066 }
2067
2068out:
2069 return ret;
2070}
2071EXPORT_SYMBOL(vfs_dedupe_file_range);
2072