1
2
3
4
5
6
7
8#include <linux/slab.h>
9#include <linux/stat.h>
10#include <linux/sched/xacct.h>
11#include <linux/fcntl.h>
12#include <linux/file.h>
13#include <linux/uio.h>
14#include <linux/fsnotify.h>
15#include <linux/security.h>
16#include <linux/export.h>
17#include <linux/syscalls.h>
18#include <linux/pagemap.h>
19#include <linux/splice.h>
20#include <linux/compat.h>
21#include <linux/mount.h>
22#include <linux/fs.h>
23#include "internal.h"
24
25#include <linux/uaccess.h>
26#include <asm/unistd.h>
27
28const struct file_operations generic_ro_fops = {
29 .llseek = generic_file_llseek,
30 .read_iter = generic_file_read_iter,
31 .mmap = generic_file_readonly_mmap,
32 .splice_read = generic_file_splice_read,
33};
34
35EXPORT_SYMBOL(generic_ro_fops);
36
37static inline bool unsigned_offsets(struct file *file)
38{
39 return file->f_mode & FMODE_UNSIGNED_OFFSET;
40}
41
42
43
44
45
46
47
48
49
50
51
52
53
54loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
55{
56 if (offset < 0 && !unsigned_offsets(file))
57 return -EINVAL;
58 if (offset > maxsize)
59 return -EINVAL;
60
61 if (offset != file->f_pos) {
62 file->f_pos = offset;
63 file->f_version = 0;
64 }
65 return offset;
66}
67EXPORT_SYMBOL(vfs_setpos);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85loff_t
86generic_file_llseek_size(struct file *file, loff_t offset, int whence,
87 loff_t maxsize, loff_t eof)
88{
89 switch (whence) {
90 case SEEK_END:
91 offset += eof;
92 break;
93 case SEEK_CUR:
94
95
96
97
98
99
100 if (offset == 0)
101 return file->f_pos;
102
103
104
105
106
107 spin_lock(&file->f_lock);
108 offset = vfs_setpos(file, file->f_pos + offset, maxsize);
109 spin_unlock(&file->f_lock);
110 return offset;
111 case SEEK_DATA:
112
113
114
115
116 if ((unsigned long long)offset >= eof)
117 return -ENXIO;
118 break;
119 case SEEK_HOLE:
120
121
122
123
124 if ((unsigned long long)offset >= eof)
125 return -ENXIO;
126 offset = eof;
127 break;
128 }
129
130 return vfs_setpos(file, offset, maxsize);
131}
132EXPORT_SYMBOL(generic_file_llseek_size);
133
134
135
136
137
138
139
140
141
142
143
144loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
145{
146 struct inode *inode = file->f_mapping->host;
147
148 return generic_file_llseek_size(file, offset, whence,
149 inode->i_sb->s_maxbytes,
150 i_size_read(inode));
151}
152EXPORT_SYMBOL(generic_file_llseek);
153
154
155
156
157
158
159
160
161
162loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
163{
164 switch (whence) {
165 case SEEK_SET: case SEEK_CUR: case SEEK_END:
166 return generic_file_llseek_size(file, offset, whence,
167 size, size);
168 default:
169 return -EINVAL;
170 }
171}
172EXPORT_SYMBOL(fixed_size_llseek);
173
174
175
176
177
178
179
180
181loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
182{
183 switch (whence) {
184 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence,
186 OFFSET_MAX, 0);
187 default:
188 return -EINVAL;
189 }
190}
191EXPORT_SYMBOL(no_seek_end_llseek);
192
193
194
195
196
197
198
199
200
201loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
202{
203 switch (whence) {
204 case SEEK_SET: case SEEK_CUR:
205 return generic_file_llseek_size(file, offset, whence,
206 size, 0);
207 default:
208 return -EINVAL;
209 }
210}
211EXPORT_SYMBOL(no_seek_end_llseek_size);
212
213
214
215
216
217
218
219
220
221
222
223
224loff_t noop_llseek(struct file *file, loff_t offset, int whence)
225{
226 return file->f_pos;
227}
228EXPORT_SYMBOL(noop_llseek);
229
230loff_t no_llseek(struct file *file, loff_t offset, int whence)
231{
232 return -ESPIPE;
233}
234EXPORT_SYMBOL(no_llseek);
235
236loff_t default_llseek(struct file *file, loff_t offset, int whence)
237{
238 struct inode *inode = file_inode(file);
239 loff_t retval;
240
241 inode_lock(inode);
242 switch (whence) {
243 case SEEK_END:
244 offset += i_size_read(inode);
245 break;
246 case SEEK_CUR:
247 if (offset == 0) {
248 retval = file->f_pos;
249 goto out;
250 }
251 offset += file->f_pos;
252 break;
253 case SEEK_DATA:
254
255
256
257
258
259 if (offset >= inode->i_size) {
260 retval = -ENXIO;
261 goto out;
262 }
263 break;
264 case SEEK_HOLE:
265
266
267
268
269
270 if (offset >= inode->i_size) {
271 retval = -ENXIO;
272 goto out;
273 }
274 offset = inode->i_size;
275 break;
276 }
277 retval = -EINVAL;
278 if (offset >= 0 || unsigned_offsets(file)) {
279 if (offset != file->f_pos) {
280 file->f_pos = offset;
281 file->f_version = 0;
282 }
283 retval = offset;
284 }
285out:
286 inode_unlock(inode);
287 return retval;
288}
289EXPORT_SYMBOL(default_llseek);
290
291loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
292{
293 loff_t (*fn)(struct file *, loff_t, int);
294
295 fn = no_llseek;
296 if (file->f_mode & FMODE_LSEEK) {
297 if (file->f_op->llseek)
298 fn = file->f_op->llseek;
299 }
300 return fn(file, offset, whence);
301}
302EXPORT_SYMBOL(vfs_llseek);
303
304SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
305{
306 off_t retval;
307 struct fd f = fdget_pos(fd);
308 if (!f.file)
309 return -EBADF;
310
311 retval = -EINVAL;
312 if (whence <= SEEK_MAX) {
313 loff_t res = vfs_llseek(f.file, offset, whence);
314 retval = res;
315 if (res != (loff_t)retval)
316 retval = -EOVERFLOW;
317 }
318 fdput_pos(f);
319 return retval;
320}
321
322#ifdef CONFIG_COMPAT
323COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
324{
325 return sys_lseek(fd, offset, whence);
326}
327#endif
328
329#ifdef __ARCH_WANT_SYS_LLSEEK
330SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
331 unsigned long, offset_low, loff_t __user *, result,
332 unsigned int, whence)
333{
334 int retval;
335 struct fd f = fdget_pos(fd);
336 loff_t offset;
337
338 if (!f.file)
339 return -EBADF;
340
341 retval = -EINVAL;
342 if (whence > SEEK_MAX)
343 goto out_putf;
344
345 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
346 whence);
347
348 retval = (int)offset;
349 if (offset >= 0) {
350 retval = -EFAULT;
351 if (!copy_to_user(result, &offset, sizeof(offset)))
352 retval = 0;
353 }
354out_putf:
355 fdput_pos(f);
356 return retval;
357}
358#endif
359
360int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
361{
362 struct inode *inode;
363 loff_t pos;
364 int retval = -EINVAL;
365
366 inode = file_inode(file);
367 if (unlikely((ssize_t) count < 0))
368 return retval;
369 pos = *ppos;
370 if (unlikely(pos < 0)) {
371 if (!unsigned_offsets(file))
372 return retval;
373 if (count >= -pos)
374 return -EOVERFLOW;
375 } else if (unlikely((loff_t) (pos + count) < 0)) {
376 if (!unsigned_offsets(file))
377 return retval;
378 }
379
380 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
381 retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
382 read_write == READ ? F_RDLCK : F_WRLCK);
383 if (retval < 0)
384 return retval;
385 }
386 return security_file_permission(file,
387 read_write == READ ? MAY_READ : MAY_WRITE);
388}
389
390static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
391{
392 struct iovec iov = { .iov_base = buf, .iov_len = len };
393 struct kiocb kiocb;
394 struct iov_iter iter;
395 ssize_t ret;
396
397 init_sync_kiocb(&kiocb, filp);
398 kiocb.ki_pos = *ppos;
399 iov_iter_init(&iter, READ, &iov, 1, len);
400
401 ret = call_read_iter(filp, &kiocb, &iter);
402 BUG_ON(ret == -EIOCBQUEUED);
403 *ppos = kiocb.ki_pos;
404 return ret;
405}
406
407ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
408 loff_t *pos)
409{
410 if (file->f_op->read)
411 return file->f_op->read(file, buf, count, pos);
412 else if (file->f_op->read_iter)
413 return new_sync_read(file, buf, count, pos);
414 else
415 return -EINVAL;
416}
417
418ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
419{
420 mm_segment_t old_fs;
421 ssize_t result;
422
423 old_fs = get_fs();
424 set_fs(get_ds());
425
426 result = vfs_read(file, (void __user *)buf, count, pos);
427 set_fs(old_fs);
428 return result;
429}
430EXPORT_SYMBOL(kernel_read);
431
432ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
433{
434 ssize_t ret;
435
436 if (!(file->f_mode & FMODE_READ))
437 return -EBADF;
438 if (!(file->f_mode & FMODE_CAN_READ))
439 return -EINVAL;
440 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
441 return -EFAULT;
442
443 ret = rw_verify_area(READ, file, pos, count);
444 if (!ret) {
445 if (count > MAX_RW_COUNT)
446 count = MAX_RW_COUNT;
447 ret = __vfs_read(file, buf, count, pos);
448 if (ret > 0) {
449 fsnotify_access(file);
450 add_rchar(current, ret);
451 }
452 inc_syscr(current);
453 }
454
455 return ret;
456}
457
458static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
459{
460 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
461 struct kiocb kiocb;
462 struct iov_iter iter;
463 ssize_t ret;
464
465 init_sync_kiocb(&kiocb, filp);
466 kiocb.ki_pos = *ppos;
467 iov_iter_init(&iter, WRITE, &iov, 1, len);
468
469 ret = call_write_iter(filp, &kiocb, &iter);
470 BUG_ON(ret == -EIOCBQUEUED);
471 if (ret > 0)
472 *ppos = kiocb.ki_pos;
473 return ret;
474}
475
476ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
477 loff_t *pos)
478{
479 if (file->f_op->write)
480 return file->f_op->write(file, p, count, pos);
481 else if (file->f_op->write_iter)
482 return new_sync_write(file, p, count, pos);
483 else
484 return -EINVAL;
485}
486
487ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
488{
489 mm_segment_t old_fs;
490 const char __user *p;
491 ssize_t ret;
492
493 if (!(file->f_mode & FMODE_CAN_WRITE))
494 return -EINVAL;
495
496 old_fs = get_fs();
497 set_fs(get_ds());
498 p = (__force const char __user *)buf;
499 if (count > MAX_RW_COUNT)
500 count = MAX_RW_COUNT;
501 ret = __vfs_write(file, p, count, pos);
502 set_fs(old_fs);
503 if (ret > 0) {
504 fsnotify_modify(file);
505 add_wchar(current, ret);
506 }
507 inc_syscw(current);
508 return ret;
509}
510EXPORT_SYMBOL(__kernel_write);
511
512ssize_t kernel_write(struct file *file, const void *buf, size_t count,
513 loff_t *pos)
514{
515 mm_segment_t old_fs;
516 ssize_t res;
517
518 old_fs = get_fs();
519 set_fs(get_ds());
520
521 res = vfs_write(file, (__force const char __user *)buf, count, pos);
522 set_fs(old_fs);
523
524 return res;
525}
526EXPORT_SYMBOL(kernel_write);
527
528ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
529{
530 ssize_t ret;
531
532 if (!(file->f_mode & FMODE_WRITE))
533 return -EBADF;
534 if (!(file->f_mode & FMODE_CAN_WRITE))
535 return -EINVAL;
536 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
537 return -EFAULT;
538
539 ret = rw_verify_area(WRITE, file, pos, count);
540 if (!ret) {
541 if (count > MAX_RW_COUNT)
542 count = MAX_RW_COUNT;
543 file_start_write(file);
544 ret = __vfs_write(file, buf, count, pos);
545 if (ret > 0) {
546 fsnotify_modify(file);
547 add_wchar(current, ret);
548 }
549 inc_syscw(current);
550 file_end_write(file);
551 }
552
553 return ret;
554}
555
556static inline loff_t file_pos_read(struct file *file)
557{
558 return file->f_pos;
559}
560
561static inline void file_pos_write(struct file *file, loff_t pos)
562{
563 file->f_pos = pos;
564}
565
566SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
567{
568 struct fd f = fdget_pos(fd);
569 ssize_t ret = -EBADF;
570
571 if (f.file) {
572 loff_t pos = file_pos_read(f.file);
573 ret = vfs_read(f.file, buf, count, &pos);
574 if (ret >= 0)
575 file_pos_write(f.file, pos);
576 fdput_pos(f);
577 }
578 return ret;
579}
580
581SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
582 size_t, count)
583{
584 struct fd f = fdget_pos(fd);
585 ssize_t ret = -EBADF;
586
587 if (f.file) {
588 loff_t pos = file_pos_read(f.file);
589 ret = vfs_write(f.file, buf, count, &pos);
590 if (ret >= 0)
591 file_pos_write(f.file, pos);
592 fdput_pos(f);
593 }
594
595 return ret;
596}
597
598SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
599 size_t, count, loff_t, pos)
600{
601 struct fd f;
602 ssize_t ret = -EBADF;
603
604 if (pos < 0)
605 return -EINVAL;
606
607 f = fdget(fd);
608 if (f.file) {
609 ret = -ESPIPE;
610 if (f.file->f_mode & FMODE_PREAD)
611 ret = vfs_read(f.file, buf, count, &pos);
612 fdput(f);
613 }
614
615 return ret;
616}
617
618SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
619 size_t, count, loff_t, pos)
620{
621 struct fd f;
622 ssize_t ret = -EBADF;
623
624 if (pos < 0)
625 return -EINVAL;
626
627 f = fdget(fd);
628 if (f.file) {
629 ret = -ESPIPE;
630 if (f.file->f_mode & FMODE_PWRITE)
631 ret = vfs_write(f.file, buf, count, &pos);
632 fdput(f);
633 }
634
635 return ret;
636}
637
638
639
640
641unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
642{
643 unsigned long seg = 0;
644 size_t len = 0;
645
646 while (seg < nr_segs) {
647 seg++;
648 if (len + iov->iov_len >= to) {
649 iov->iov_len = to - len;
650 break;
651 }
652 len += iov->iov_len;
653 iov++;
654 }
655 return seg;
656}
657EXPORT_SYMBOL(iov_shorten);
658
659static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
660 loff_t *ppos, int type, rwf_t flags)
661{
662 struct kiocb kiocb;
663 ssize_t ret;
664
665 init_sync_kiocb(&kiocb, filp);
666 ret = kiocb_set_rw_flags(&kiocb, flags);
667 if (ret)
668 return ret;
669 kiocb.ki_pos = *ppos;
670
671 if (type == READ)
672 ret = call_read_iter(filp, &kiocb, iter);
673 else
674 ret = call_write_iter(filp, &kiocb, iter);
675 BUG_ON(ret == -EIOCBQUEUED);
676 *ppos = kiocb.ki_pos;
677 return ret;
678}
679
680
681static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
682 loff_t *ppos, int type, rwf_t flags)
683{
684 ssize_t ret = 0;
685
686 if (flags & ~RWF_HIPRI)
687 return -EOPNOTSUPP;
688
689 while (iov_iter_count(iter)) {
690 struct iovec iovec = iov_iter_iovec(iter);
691 ssize_t nr;
692
693 if (type == READ) {
694 nr = filp->f_op->read(filp, iovec.iov_base,
695 iovec.iov_len, ppos);
696 } else {
697 nr = filp->f_op->write(filp, iovec.iov_base,
698 iovec.iov_len, ppos);
699 }
700
701 if (nr < 0) {
702 if (!ret)
703 ret = nr;
704 break;
705 }
706 ret += nr;
707 if (nr != iovec.iov_len)
708 break;
709 iov_iter_advance(iter, nr);
710 }
711
712 return ret;
713}
714
715
716#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
748 unsigned long nr_segs, unsigned long fast_segs,
749 struct iovec *fast_pointer,
750 struct iovec **ret_pointer)
751{
752 unsigned long seg;
753 ssize_t ret;
754 struct iovec *iov = fast_pointer;
755
756
757
758
759
760
761 if (nr_segs == 0) {
762 ret = 0;
763 goto out;
764 }
765
766
767
768
769
770 if (nr_segs > UIO_MAXIOV) {
771 ret = -EINVAL;
772 goto out;
773 }
774 if (nr_segs > fast_segs) {
775 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
776 if (iov == NULL) {
777 ret = -ENOMEM;
778 goto out;
779 }
780 }
781 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
782 ret = -EFAULT;
783 goto out;
784 }
785
786
787
788
789
790
791
792
793
794
795 ret = 0;
796 for (seg = 0; seg < nr_segs; seg++) {
797 void __user *buf = iov[seg].iov_base;
798 ssize_t len = (ssize_t)iov[seg].iov_len;
799
800
801
802 if (len < 0) {
803 ret = -EINVAL;
804 goto out;
805 }
806 if (type >= 0
807 && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
808 ret = -EFAULT;
809 goto out;
810 }
811 if (len > MAX_RW_COUNT - ret) {
812 len = MAX_RW_COUNT - ret;
813 iov[seg].iov_len = len;
814 }
815 ret += len;
816 }
817out:
818 *ret_pointer = iov;
819 return ret;
820}
821
822#ifdef CONFIG_COMPAT
823ssize_t compat_rw_copy_check_uvector(int type,
824 const struct compat_iovec __user *uvector, unsigned long nr_segs,
825 unsigned long fast_segs, struct iovec *fast_pointer,
826 struct iovec **ret_pointer)
827{
828 compat_ssize_t tot_len;
829 struct iovec *iov = *ret_pointer = fast_pointer;
830 ssize_t ret = 0;
831 int seg;
832
833
834
835
836
837
838 if (nr_segs == 0)
839 goto out;
840
841 ret = -EINVAL;
842 if (nr_segs > UIO_MAXIOV)
843 goto out;
844 if (nr_segs > fast_segs) {
845 ret = -ENOMEM;
846 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
847 if (iov == NULL)
848 goto out;
849 }
850 *ret_pointer = iov;
851
852 ret = -EFAULT;
853 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
854 goto out;
855
856
857
858
859
860
861
862
863
864 tot_len = 0;
865 ret = -EINVAL;
866 for (seg = 0; seg < nr_segs; seg++) {
867 compat_uptr_t buf;
868 compat_ssize_t len;
869
870 if (__get_user(len, &uvector->iov_len) ||
871 __get_user(buf, &uvector->iov_base)) {
872 ret = -EFAULT;
873 goto out;
874 }
875 if (len < 0)
876 goto out;
877 if (type >= 0 &&
878 !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
879 ret = -EFAULT;
880 goto out;
881 }
882 if (len > MAX_RW_COUNT - tot_len)
883 len = MAX_RW_COUNT - tot_len;
884 tot_len += len;
885 iov->iov_base = compat_ptr(buf);
886 iov->iov_len = (compat_size_t) len;
887 uvector++;
888 iov++;
889 }
890 ret = tot_len;
891
892out:
893 return ret;
894}
895#endif
896
897static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
898 loff_t *pos, rwf_t flags)
899{
900 size_t tot_len;
901 ssize_t ret = 0;
902
903 if (!(file->f_mode & FMODE_READ))
904 return -EBADF;
905 if (!(file->f_mode & FMODE_CAN_READ))
906 return -EINVAL;
907
908 tot_len = iov_iter_count(iter);
909 if (!tot_len)
910 goto out;
911 ret = rw_verify_area(READ, file, pos, tot_len);
912 if (ret < 0)
913 return ret;
914
915 if (file->f_op->read_iter)
916 ret = do_iter_readv_writev(file, iter, pos, READ, flags);
917 else
918 ret = do_loop_readv_writev(file, iter, pos, READ, flags);
919out:
920 if (ret >= 0)
921 fsnotify_access(file);
922 return ret;
923}
924
925ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
926 rwf_t flags)
927{
928 if (!file->f_op->read_iter)
929 return -EINVAL;
930 return do_iter_read(file, iter, ppos, flags);
931}
932EXPORT_SYMBOL(vfs_iter_read);
933
934static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
935 loff_t *pos, rwf_t flags)
936{
937 size_t tot_len;
938 ssize_t ret = 0;
939
940 if (!(file->f_mode & FMODE_WRITE))
941 return -EBADF;
942 if (!(file->f_mode & FMODE_CAN_WRITE))
943 return -EINVAL;
944
945 tot_len = iov_iter_count(iter);
946 if (!tot_len)
947 return 0;
948 ret = rw_verify_area(WRITE, file, pos, tot_len);
949 if (ret < 0)
950 return ret;
951
952 if (file->f_op->write_iter)
953 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
954 else
955 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
956 if (ret > 0)
957 fsnotify_modify(file);
958 return ret;
959}
960
961ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
962 rwf_t flags)
963{
964 if (!file->f_op->write_iter)
965 return -EINVAL;
966 return do_iter_write(file, iter, ppos, flags);
967}
968EXPORT_SYMBOL(vfs_iter_write);
969
970ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
971 unsigned long vlen, loff_t *pos, rwf_t flags)
972{
973 struct iovec iovstack[UIO_FASTIOV];
974 struct iovec *iov = iovstack;
975 struct iov_iter iter;
976 ssize_t ret;
977
978 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
979 if (ret >= 0) {
980 ret = do_iter_read(file, &iter, pos, flags);
981 kfree(iov);
982 }
983
984 return ret;
985}
986
987static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
988 unsigned long vlen, loff_t *pos, rwf_t flags)
989{
990 struct iovec iovstack[UIO_FASTIOV];
991 struct iovec *iov = iovstack;
992 struct iov_iter iter;
993 ssize_t ret;
994
995 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
996 if (ret >= 0) {
997 file_start_write(file);
998 ret = do_iter_write(file, &iter, pos, flags);
999 file_end_write(file);
1000 kfree(iov);
1001 }
1002 return ret;
1003}
1004
1005static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
1006 unsigned long vlen, rwf_t flags)
1007{
1008 struct fd f = fdget_pos(fd);
1009 ssize_t ret = -EBADF;
1010
1011 if (f.file) {
1012 loff_t pos = file_pos_read(f.file);
1013 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1014 if (ret >= 0)
1015 file_pos_write(f.file, pos);
1016 fdput_pos(f);
1017 }
1018
1019 if (ret > 0)
1020 add_rchar(current, ret);
1021 inc_syscr(current);
1022 return ret;
1023}
1024
1025static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
1026 unsigned long vlen, rwf_t flags)
1027{
1028 struct fd f = fdget_pos(fd);
1029 ssize_t ret = -EBADF;
1030
1031 if (f.file) {
1032 loff_t pos = file_pos_read(f.file);
1033 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1034 if (ret >= 0)
1035 file_pos_write(f.file, pos);
1036 fdput_pos(f);
1037 }
1038
1039 if (ret > 0)
1040 add_wchar(current, ret);
1041 inc_syscw(current);
1042 return ret;
1043}
1044
1045static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
1046{
1047#define HALF_LONG_BITS (BITS_PER_LONG / 2)
1048 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
1049}
1050
1051static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
1052 unsigned long vlen, loff_t pos, rwf_t flags)
1053{
1054 struct fd f;
1055 ssize_t ret = -EBADF;
1056
1057 if (pos < 0)
1058 return -EINVAL;
1059
1060 f = fdget(fd);
1061 if (f.file) {
1062 ret = -ESPIPE;
1063 if (f.file->f_mode & FMODE_PREAD)
1064 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1065 fdput(f);
1066 }
1067
1068 if (ret > 0)
1069 add_rchar(current, ret);
1070 inc_syscr(current);
1071 return ret;
1072}
1073
1074static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1075 unsigned long vlen, loff_t pos, rwf_t flags)
1076{
1077 struct fd f;
1078 ssize_t ret = -EBADF;
1079
1080 if (pos < 0)
1081 return -EINVAL;
1082
1083 f = fdget(fd);
1084 if (f.file) {
1085 ret = -ESPIPE;
1086 if (f.file->f_mode & FMODE_PWRITE)
1087 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1088 fdput(f);
1089 }
1090
1091 if (ret > 0)
1092 add_wchar(current, ret);
1093 inc_syscw(current);
1094 return ret;
1095}
1096
1097SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1098 unsigned long, vlen)
1099{
1100 return do_readv(fd, vec, vlen, 0);
1101}
1102
1103SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1104 unsigned long, vlen)
1105{
1106 return do_writev(fd, vec, vlen, 0);
1107}
1108
1109SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1110 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1111{
1112 loff_t pos = pos_from_hilo(pos_h, pos_l);
1113
1114 return do_preadv(fd, vec, vlen, pos, 0);
1115}
1116
1117SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1118 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1119 rwf_t, flags)
1120{
1121 loff_t pos = pos_from_hilo(pos_h, pos_l);
1122
1123 if (pos == -1)
1124 return do_readv(fd, vec, vlen, flags);
1125
1126 return do_preadv(fd, vec, vlen, pos, flags);
1127}
1128
1129SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1130 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1131{
1132 loff_t pos = pos_from_hilo(pos_h, pos_l);
1133
1134 return do_pwritev(fd, vec, vlen, pos, 0);
1135}
1136
1137SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1138 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1139 rwf_t, flags)
1140{
1141 loff_t pos = pos_from_hilo(pos_h, pos_l);
1142
1143 if (pos == -1)
1144 return do_writev(fd, vec, vlen, flags);
1145
1146 return do_pwritev(fd, vec, vlen, pos, flags);
1147}
1148
1149#ifdef CONFIG_COMPAT
1150static size_t compat_readv(struct file *file,
1151 const struct compat_iovec __user *vec,
1152 unsigned long vlen, loff_t *pos, rwf_t flags)
1153{
1154 struct iovec iovstack[UIO_FASTIOV];
1155 struct iovec *iov = iovstack;
1156 struct iov_iter iter;
1157 ssize_t ret;
1158
1159 ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
1160 if (ret >= 0) {
1161 ret = do_iter_read(file, &iter, pos, flags);
1162 kfree(iov);
1163 }
1164 if (ret > 0)
1165 add_rchar(current, ret);
1166 inc_syscr(current);
1167 return ret;
1168}
1169
1170static size_t do_compat_readv(compat_ulong_t fd,
1171 const struct compat_iovec __user *vec,
1172 compat_ulong_t vlen, rwf_t flags)
1173{
1174 struct fd f = fdget_pos(fd);
1175 ssize_t ret;
1176 loff_t pos;
1177
1178 if (!f.file)
1179 return -EBADF;
1180 pos = f.file->f_pos;
1181 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1182 if (ret >= 0)
1183 f.file->f_pos = pos;
1184 fdput_pos(f);
1185 return ret;
1186
1187}
1188
1189COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1190 const struct compat_iovec __user *,vec,
1191 compat_ulong_t, vlen)
1192{
1193 return do_compat_readv(fd, vec, vlen, 0);
1194}
1195
1196static long do_compat_preadv64(unsigned long fd,
1197 const struct compat_iovec __user *vec,
1198 unsigned long vlen, loff_t pos, rwf_t flags)
1199{
1200 struct fd f;
1201 ssize_t ret;
1202
1203 if (pos < 0)
1204 return -EINVAL;
1205 f = fdget(fd);
1206 if (!f.file)
1207 return -EBADF;
1208 ret = -ESPIPE;
1209 if (f.file->f_mode & FMODE_PREAD)
1210 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1211 fdput(f);
1212 return ret;
1213}
1214
1215#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
1216COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1217 const struct compat_iovec __user *,vec,
1218 unsigned long, vlen, loff_t, pos)
1219{
1220 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1221}
1222#endif
1223
1224COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1225 const struct compat_iovec __user *,vec,
1226 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1227{
1228 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1229
1230 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1231}
1232
1233#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1234COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1235 const struct compat_iovec __user *,vec,
1236 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1237{
1238 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1239}
1240#endif
1241
1242COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1243 const struct compat_iovec __user *,vec,
1244 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1245 rwf_t, flags)
1246{
1247 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1248
1249 if (pos == -1)
1250 return do_compat_readv(fd, vec, vlen, flags);
1251
1252 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1253}
1254
1255static size_t compat_writev(struct file *file,
1256 const struct compat_iovec __user *vec,
1257 unsigned long vlen, loff_t *pos, rwf_t flags)
1258{
1259 struct iovec iovstack[UIO_FASTIOV];
1260 struct iovec *iov = iovstack;
1261 struct iov_iter iter;
1262 ssize_t ret;
1263
1264 ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
1265 if (ret >= 0) {
1266 file_start_write(file);
1267 ret = do_iter_write(file, &iter, pos, flags);
1268 file_end_write(file);
1269 kfree(iov);
1270 }
1271 if (ret > 0)
1272 add_wchar(current, ret);
1273 inc_syscw(current);
1274 return ret;
1275}
1276
1277static size_t do_compat_writev(compat_ulong_t fd,
1278 const struct compat_iovec __user* vec,
1279 compat_ulong_t vlen, rwf_t flags)
1280{
1281 struct fd f = fdget_pos(fd);
1282 ssize_t ret;
1283 loff_t pos;
1284
1285 if (!f.file)
1286 return -EBADF;
1287 pos = f.file->f_pos;
1288 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1289 if (ret >= 0)
1290 f.file->f_pos = pos;
1291 fdput_pos(f);
1292 return ret;
1293}
1294
1295COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1296 const struct compat_iovec __user *, vec,
1297 compat_ulong_t, vlen)
1298{
1299 return do_compat_writev(fd, vec, vlen, 0);
1300}
1301
1302static long do_compat_pwritev64(unsigned long fd,
1303 const struct compat_iovec __user *vec,
1304 unsigned long vlen, loff_t pos, rwf_t flags)
1305{
1306 struct fd f;
1307 ssize_t ret;
1308
1309 if (pos < 0)
1310 return -EINVAL;
1311 f = fdget(fd);
1312 if (!f.file)
1313 return -EBADF;
1314 ret = -ESPIPE;
1315 if (f.file->f_mode & FMODE_PWRITE)
1316 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1317 fdput(f);
1318 return ret;
1319}
1320
1321#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
1322COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1323 const struct compat_iovec __user *,vec,
1324 unsigned long, vlen, loff_t, pos)
1325{
1326 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1327}
1328#endif
1329
1330COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1331 const struct compat_iovec __user *,vec,
1332 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1333{
1334 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1335
1336 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1337}
1338
1339#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1340COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1341 const struct compat_iovec __user *,vec,
1342 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1343{
1344 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1345}
1346#endif
1347
1348COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1349 const struct compat_iovec __user *,vec,
1350 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1351{
1352 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1353
1354 if (pos == -1)
1355 return do_compat_writev(fd, vec, vlen, flags);
1356
1357 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1358}
1359
1360#endif
1361
1362static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1363 size_t count, loff_t max)
1364{
1365 struct fd in, out;
1366 struct inode *in_inode, *out_inode;
1367 loff_t pos;
1368 loff_t out_pos;
1369 ssize_t retval;
1370 int fl;
1371
1372
1373
1374
1375 retval = -EBADF;
1376 in = fdget(in_fd);
1377 if (!in.file)
1378 goto out;
1379 if (!(in.file->f_mode & FMODE_READ))
1380 goto fput_in;
1381 retval = -ESPIPE;
1382 if (!ppos) {
1383 pos = in.file->f_pos;
1384 } else {
1385 pos = *ppos;
1386 if (!(in.file->f_mode & FMODE_PREAD))
1387 goto fput_in;
1388 }
1389 retval = rw_verify_area(READ, in.file, &pos, count);
1390 if (retval < 0)
1391 goto fput_in;
1392 if (count > MAX_RW_COUNT)
1393 count = MAX_RW_COUNT;
1394
1395
1396
1397
1398 retval = -EBADF;
1399 out = fdget(out_fd);
1400 if (!out.file)
1401 goto fput_in;
1402 if (!(out.file->f_mode & FMODE_WRITE))
1403 goto fput_out;
1404 retval = -EINVAL;
1405 in_inode = file_inode(in.file);
1406 out_inode = file_inode(out.file);
1407 out_pos = out.file->f_pos;
1408 retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1409 if (retval < 0)
1410 goto fput_out;
1411
1412 if (!max)
1413 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1414
1415 if (unlikely(pos + count > max)) {
1416 retval = -EOVERFLOW;
1417 if (pos >= max)
1418 goto fput_out;
1419 count = max - pos;
1420 }
1421
1422 fl = 0;
1423#if 0
1424
1425
1426
1427
1428
1429
1430 if (in.file->f_flags & O_NONBLOCK)
1431 fl = SPLICE_F_NONBLOCK;
1432#endif
1433 file_start_write(out.file);
1434 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1435 file_end_write(out.file);
1436
1437 if (retval > 0) {
1438 add_rchar(current, retval);
1439 add_wchar(current, retval);
1440 fsnotify_access(in.file);
1441 fsnotify_modify(out.file);
1442 out.file->f_pos = out_pos;
1443 if (ppos)
1444 *ppos = pos;
1445 else
1446 in.file->f_pos = pos;
1447 }
1448
1449 inc_syscr(current);
1450 inc_syscw(current);
1451 if (pos > max)
1452 retval = -EOVERFLOW;
1453
1454fput_out:
1455 fdput(out);
1456fput_in:
1457 fdput(in);
1458out:
1459 return retval;
1460}
1461
1462SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1463{
1464 loff_t pos;
1465 off_t off;
1466 ssize_t ret;
1467
1468 if (offset) {
1469 if (unlikely(get_user(off, offset)))
1470 return -EFAULT;
1471 pos = off;
1472 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1473 if (unlikely(put_user(pos, offset)))
1474 return -EFAULT;
1475 return ret;
1476 }
1477
1478 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1479}
1480
1481SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1482{
1483 loff_t pos;
1484 ssize_t ret;
1485
1486 if (offset) {
1487 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1488 return -EFAULT;
1489 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1490 if (unlikely(put_user(pos, offset)))
1491 return -EFAULT;
1492 return ret;
1493 }
1494
1495 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1496}
1497
1498#ifdef CONFIG_COMPAT
1499COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1500 compat_off_t __user *, offset, compat_size_t, count)
1501{
1502 loff_t pos;
1503 off_t off;
1504 ssize_t ret;
1505
1506 if (offset) {
1507 if (unlikely(get_user(off, offset)))
1508 return -EFAULT;
1509 pos = off;
1510 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1511 if (unlikely(put_user(pos, offset)))
1512 return -EFAULT;
1513 return ret;
1514 }
1515
1516 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1517}
1518
1519COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1520 compat_loff_t __user *, offset, compat_size_t, count)
1521{
1522 loff_t pos;
1523 ssize_t ret;
1524
1525 if (offset) {
1526 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1527 return -EFAULT;
1528 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1529 if (unlikely(put_user(pos, offset)))
1530 return -EFAULT;
1531 return ret;
1532 }
1533
1534 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1535}
1536#endif
1537
1538
1539
1540
1541
1542
1543ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1544 struct file *file_out, loff_t pos_out,
1545 size_t len, unsigned int flags)
1546{
1547 struct inode *inode_in = file_inode(file_in);
1548 struct inode *inode_out = file_inode(file_out);
1549 ssize_t ret;
1550
1551 if (flags != 0)
1552 return -EINVAL;
1553
1554 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1555 return -EISDIR;
1556 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1557 return -EINVAL;
1558
1559 ret = rw_verify_area(READ, file_in, &pos_in, len);
1560 if (unlikely(ret))
1561 return ret;
1562
1563 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1564 if (unlikely(ret))
1565 return ret;
1566
1567 if (!(file_in->f_mode & FMODE_READ) ||
1568 !(file_out->f_mode & FMODE_WRITE) ||
1569 (file_out->f_flags & O_APPEND))
1570 return -EBADF;
1571
1572
1573 if (inode_in->i_sb != inode_out->i_sb)
1574 return -EXDEV;
1575
1576 if (len == 0)
1577 return 0;
1578
1579 file_start_write(file_out);
1580
1581
1582
1583
1584
1585 if (file_in->f_op->clone_file_range) {
1586 ret = file_in->f_op->clone_file_range(file_in, pos_in,
1587 file_out, pos_out, len);
1588 if (ret == 0) {
1589 ret = len;
1590 goto done;
1591 }
1592 }
1593
1594 if (file_out->f_op->copy_file_range) {
1595 ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
1596 pos_out, len, flags);
1597 if (ret != -EOPNOTSUPP)
1598 goto done;
1599 }
1600
1601 ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1602 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1603
1604done:
1605 if (ret > 0) {
1606 fsnotify_access(file_in);
1607 add_rchar(current, ret);
1608 fsnotify_modify(file_out);
1609 add_wchar(current, ret);
1610 }
1611
1612 inc_syscr(current);
1613 inc_syscw(current);
1614
1615 file_end_write(file_out);
1616
1617 return ret;
1618}
1619EXPORT_SYMBOL(vfs_copy_file_range);
1620
1621SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1622 int, fd_out, loff_t __user *, off_out,
1623 size_t, len, unsigned int, flags)
1624{
1625 loff_t pos_in;
1626 loff_t pos_out;
1627 struct fd f_in;
1628 struct fd f_out;
1629 ssize_t ret = -EBADF;
1630
1631 f_in = fdget(fd_in);
1632 if (!f_in.file)
1633 goto out2;
1634
1635 f_out = fdget(fd_out);
1636 if (!f_out.file)
1637 goto out1;
1638
1639 ret = -EFAULT;
1640 if (off_in) {
1641 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1642 goto out;
1643 } else {
1644 pos_in = f_in.file->f_pos;
1645 }
1646
1647 if (off_out) {
1648 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1649 goto out;
1650 } else {
1651 pos_out = f_out.file->f_pos;
1652 }
1653
1654 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1655 flags);
1656 if (ret > 0) {
1657 pos_in += ret;
1658 pos_out += ret;
1659
1660 if (off_in) {
1661 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1662 ret = -EFAULT;
1663 } else {
1664 f_in.file->f_pos = pos_in;
1665 }
1666
1667 if (off_out) {
1668 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1669 ret = -EFAULT;
1670 } else {
1671 f_out.file->f_pos = pos_out;
1672 }
1673 }
1674
1675out:
1676 fdput(f_out);
1677out1:
1678 fdput(f_in);
1679out2:
1680 return ret;
1681}
1682
1683static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
1684{
1685 struct inode *inode = file_inode(file);
1686
1687 if (unlikely(pos < 0))
1688 return -EINVAL;
1689
1690 if (unlikely((loff_t) (pos + len) < 0))
1691 return -EINVAL;
1692
1693 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1694 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1695 int retval;
1696
1697 retval = locks_mandatory_area(inode, file, pos, end,
1698 write ? F_WRLCK : F_RDLCK);
1699 if (retval < 0)
1700 return retval;
1701 }
1702
1703 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1704}
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
1715 struct inode *inode_out, loff_t pos_out,
1716 u64 *len, bool is_dedupe)
1717{
1718 loff_t bs = inode_out->i_sb->s_blocksize;
1719 loff_t blen;
1720 loff_t isize;
1721 bool same_inode = (inode_in == inode_out);
1722 int ret;
1723
1724
1725 if (IS_IMMUTABLE(inode_out))
1726 return -EPERM;
1727
1728 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1729 return -ETXTBSY;
1730
1731
1732 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1733 return -EISDIR;
1734 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1735 return -EINVAL;
1736
1737
1738 isize = i_size_read(inode_in);
1739 if (isize == 0)
1740 return 0;
1741
1742
1743 if (*len == 0) {
1744 if (is_dedupe || pos_in == isize)
1745 return 0;
1746 if (pos_in > isize)
1747 return -EINVAL;
1748 *len = isize - pos_in;
1749 }
1750
1751
1752 if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
1753 pos_in + *len > isize)
1754 return -EINVAL;
1755
1756
1757 if (is_dedupe) {
1758 loff_t disize;
1759
1760 disize = i_size_read(inode_out);
1761 if (pos_out >= disize || pos_out + *len > disize)
1762 return -EINVAL;
1763 }
1764
1765
1766 if (pos_in + *len == isize)
1767 blen = ALIGN(isize, bs) - pos_in;
1768 else
1769 blen = *len;
1770
1771
1772 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
1773 !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
1774 return -EINVAL;
1775
1776
1777 if (same_inode) {
1778 if (pos_out + blen > pos_in && pos_out < pos_in + blen)
1779 return -EINVAL;
1780 }
1781
1782
1783 inode_dio_wait(inode_in);
1784 if (!same_inode)
1785 inode_dio_wait(inode_out);
1786
1787 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1788 pos_in, pos_in + *len - 1);
1789 if (ret)
1790 return ret;
1791
1792 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1793 pos_out, pos_out + *len - 1);
1794 if (ret)
1795 return ret;
1796
1797
1798
1799
1800 if (is_dedupe) {
1801 bool is_same = false;
1802
1803 ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
1804 inode_out, pos_out, *len, &is_same);
1805 if (ret)
1806 return ret;
1807 if (!is_same)
1808 return -EBADE;
1809 }
1810
1811 return 1;
1812}
1813EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
1814
1815int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1816 struct file *file_out, loff_t pos_out, u64 len)
1817{
1818 struct inode *inode_in = file_inode(file_in);
1819 struct inode *inode_out = file_inode(file_out);
1820 int ret;
1821
1822 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1823 return -EISDIR;
1824 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1825 return -EINVAL;
1826
1827
1828
1829
1830
1831
1832 if (inode_in->i_sb != inode_out->i_sb)
1833 return -EXDEV;
1834
1835 if (!(file_in->f_mode & FMODE_READ) ||
1836 !(file_out->f_mode & FMODE_WRITE) ||
1837 (file_out->f_flags & O_APPEND))
1838 return -EBADF;
1839
1840 if (!file_in->f_op->clone_file_range)
1841 return -EOPNOTSUPP;
1842
1843 ret = clone_verify_area(file_in, pos_in, len, false);
1844 if (ret)
1845 return ret;
1846
1847 ret = clone_verify_area(file_out, pos_out, len, true);
1848 if (ret)
1849 return ret;
1850
1851 if (pos_in + len > i_size_read(inode_in))
1852 return -EINVAL;
1853
1854 ret = file_in->f_op->clone_file_range(file_in, pos_in,
1855 file_out, pos_out, len);
1856 if (!ret) {
1857 fsnotify_access(file_in);
1858 fsnotify_modify(file_out);
1859 }
1860
1861 return ret;
1862}
1863EXPORT_SYMBOL(vfs_clone_file_range);
1864
1865
1866
1867
1868
1869static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1870{
1871 struct address_space *mapping;
1872 struct page *page;
1873 pgoff_t n;
1874
1875 n = offset >> PAGE_SHIFT;
1876 mapping = inode->i_mapping;
1877 page = read_mapping_page(mapping, n, NULL);
1878 if (IS_ERR(page))
1879 return page;
1880 if (!PageUptodate(page)) {
1881 put_page(page);
1882 return ERR_PTR(-EIO);
1883 }
1884 lock_page(page);
1885 return page;
1886}
1887
1888
1889
1890
1891
1892int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1893 struct inode *dest, loff_t destoff,
1894 loff_t len, bool *is_same)
1895{
1896 loff_t src_poff;
1897 loff_t dest_poff;
1898 void *src_addr;
1899 void *dest_addr;
1900 struct page *src_page;
1901 struct page *dest_page;
1902 loff_t cmp_len;
1903 bool same;
1904 int error;
1905
1906 error = -EINVAL;
1907 same = true;
1908 while (len) {
1909 src_poff = srcoff & (PAGE_SIZE - 1);
1910 dest_poff = destoff & (PAGE_SIZE - 1);
1911 cmp_len = min(PAGE_SIZE - src_poff,
1912 PAGE_SIZE - dest_poff);
1913 cmp_len = min(cmp_len, len);
1914 if (cmp_len <= 0)
1915 goto out_error;
1916
1917 src_page = vfs_dedupe_get_page(src, srcoff);
1918 if (IS_ERR(src_page)) {
1919 error = PTR_ERR(src_page);
1920 goto out_error;
1921 }
1922 dest_page = vfs_dedupe_get_page(dest, destoff);
1923 if (IS_ERR(dest_page)) {
1924 error = PTR_ERR(dest_page);
1925 unlock_page(src_page);
1926 put_page(src_page);
1927 goto out_error;
1928 }
1929 src_addr = kmap_atomic(src_page);
1930 dest_addr = kmap_atomic(dest_page);
1931
1932 flush_dcache_page(src_page);
1933 flush_dcache_page(dest_page);
1934
1935 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1936 same = false;
1937
1938 kunmap_atomic(dest_addr);
1939 kunmap_atomic(src_addr);
1940 unlock_page(dest_page);
1941 unlock_page(src_page);
1942 put_page(dest_page);
1943 put_page(src_page);
1944
1945 if (!same)
1946 break;
1947
1948 srcoff += cmp_len;
1949 destoff += cmp_len;
1950 len -= cmp_len;
1951 }
1952
1953 *is_same = same;
1954 return 0;
1955
1956out_error:
1957 return error;
1958}
1959EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
1960
1961int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
1962{
1963 struct file_dedupe_range_info *info;
1964 struct inode *src = file_inode(file);
1965 u64 off;
1966 u64 len;
1967 int i;
1968 int ret;
1969 bool is_admin = capable(CAP_SYS_ADMIN);
1970 u16 count = same->dest_count;
1971 struct file *dst_file;
1972 loff_t dst_off;
1973 ssize_t deduped;
1974
1975 if (!(file->f_mode & FMODE_READ))
1976 return -EINVAL;
1977
1978 if (same->reserved1 || same->reserved2)
1979 return -EINVAL;
1980
1981 off = same->src_offset;
1982 len = same->src_length;
1983
1984 ret = -EISDIR;
1985 if (S_ISDIR(src->i_mode))
1986 goto out;
1987
1988 ret = -EINVAL;
1989 if (!S_ISREG(src->i_mode))
1990 goto out;
1991
1992 ret = clone_verify_area(file, off, len, false);
1993 if (ret < 0)
1994 goto out;
1995 ret = 0;
1996
1997 if (off + len > i_size_read(src))
1998 return -EINVAL;
1999
2000
2001 for (i = 0; i < count; i++) {
2002 same->info[i].bytes_deduped = 0ULL;
2003 same->info[i].status = FILE_DEDUPE_RANGE_SAME;
2004 }
2005
2006 for (i = 0, info = same->info; i < count; i++, info++) {
2007 struct inode *dst;
2008 struct fd dst_fd = fdget(info->dest_fd);
2009
2010 dst_file = dst_fd.file;
2011 if (!dst_file) {
2012 info->status = -EBADF;
2013 goto next_loop;
2014 }
2015 dst = file_inode(dst_file);
2016
2017 ret = mnt_want_write_file(dst_file);
2018 if (ret) {
2019 info->status = ret;
2020 goto next_loop;
2021 }
2022
2023 dst_off = info->dest_offset;
2024 ret = clone_verify_area(dst_file, dst_off, len, true);
2025 if (ret < 0) {
2026 info->status = ret;
2027 goto next_file;
2028 }
2029 ret = 0;
2030
2031 if (info->reserved) {
2032 info->status = -EINVAL;
2033 } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2034 info->status = -EINVAL;
2035 } else if (file->f_path.mnt != dst_file->f_path.mnt) {
2036 info->status = -EXDEV;
2037 } else if (S_ISDIR(dst->i_mode)) {
2038 info->status = -EISDIR;
2039 } else if (dst_file->f_op->dedupe_file_range == NULL) {
2040 info->status = -EINVAL;
2041 } else {
2042 deduped = dst_file->f_op->dedupe_file_range(file, off,
2043 len, dst_file,
2044 info->dest_offset);
2045 if (deduped == -EBADE)
2046 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2047 else if (deduped < 0)
2048 info->status = deduped;
2049 else
2050 info->bytes_deduped += deduped;
2051 }
2052
2053next_file:
2054 mnt_drop_write_file(dst_file);
2055next_loop:
2056 fdput(dst_fd);
2057
2058 if (fatal_signal_pending(current))
2059 goto out;
2060 }
2061
2062out:
2063 return ret;
2064}
2065EXPORT_SYMBOL(vfs_dedupe_file_range);
2066