1
2
3
4
5
6
7
8#include <linux/slab.h>
9#include <linux/stat.h>
10#include <linux/sched/xacct.h>
11#include <linux/fcntl.h>
12#include <linux/file.h>
13#include <linux/uio.h>
14#include <linux/fsnotify.h>
15#include <linux/security.h>
16#include <linux/export.h>
17#include <linux/syscalls.h>
18#include <linux/pagemap.h>
19#include <linux/splice.h>
20#include <linux/compat.h>
21#include <linux/mount.h>
22#include <linux/fs.h>
23#include "internal.h"
24
25#include <linux/uaccess.h>
26#include <asm/unistd.h>
27
28const struct file_operations generic_ro_fops = {
29 .llseek = generic_file_llseek,
30 .read_iter = generic_file_read_iter,
31 .mmap = generic_file_readonly_mmap,
32 .splice_read = generic_file_splice_read,
33};
34
35EXPORT_SYMBOL(generic_ro_fops);
36
37static inline bool unsigned_offsets(struct file *file)
38{
39 return file->f_mode & FMODE_UNSIGNED_OFFSET;
40}
41
42
43
44
45
46
47
48
49
50
51
52
53
54loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
55{
56 if (offset < 0 && !unsigned_offsets(file))
57 return -EINVAL;
58 if (offset > maxsize)
59 return -EINVAL;
60
61 if (offset != file->f_pos) {
62 file->f_pos = offset;
63 file->f_version = 0;
64 }
65 return offset;
66}
67EXPORT_SYMBOL(vfs_setpos);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85loff_t
86generic_file_llseek_size(struct file *file, loff_t offset, int whence,
87 loff_t maxsize, loff_t eof)
88{
89 switch (whence) {
90 case SEEK_END:
91 offset += eof;
92 break;
93 case SEEK_CUR:
94
95
96
97
98
99
100 if (offset == 0)
101 return file->f_pos;
102
103
104
105
106
107 spin_lock(&file->f_lock);
108 offset = vfs_setpos(file, file->f_pos + offset, maxsize);
109 spin_unlock(&file->f_lock);
110 return offset;
111 case SEEK_DATA:
112
113
114
115
116 if ((unsigned long long)offset >= eof)
117 return -ENXIO;
118 break;
119 case SEEK_HOLE:
120
121
122
123
124 if ((unsigned long long)offset >= eof)
125 return -ENXIO;
126 offset = eof;
127 break;
128 }
129
130 return vfs_setpos(file, offset, maxsize);
131}
132EXPORT_SYMBOL(generic_file_llseek_size);
133
134
135
136
137
138
139
140
141
142
143
144loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
145{
146 struct inode *inode = file->f_mapping->host;
147
148 return generic_file_llseek_size(file, offset, whence,
149 inode->i_sb->s_maxbytes,
150 i_size_read(inode));
151}
152EXPORT_SYMBOL(generic_file_llseek);
153
154
155
156
157
158
159
160
161
162loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
163{
164 switch (whence) {
165 case SEEK_SET: case SEEK_CUR: case SEEK_END:
166 return generic_file_llseek_size(file, offset, whence,
167 size, size);
168 default:
169 return -EINVAL;
170 }
171}
172EXPORT_SYMBOL(fixed_size_llseek);
173
174
175
176
177
178
179
180
181loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
182{
183 switch (whence) {
184 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence,
186 OFFSET_MAX, 0);
187 default:
188 return -EINVAL;
189 }
190}
191EXPORT_SYMBOL(no_seek_end_llseek);
192
193
194
195
196
197
198
199
200
201loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
202{
203 switch (whence) {
204 case SEEK_SET: case SEEK_CUR:
205 return generic_file_llseek_size(file, offset, whence,
206 size, 0);
207 default:
208 return -EINVAL;
209 }
210}
211EXPORT_SYMBOL(no_seek_end_llseek_size);
212
213
214
215
216
217
218
219
220
221
222
223
224loff_t noop_llseek(struct file *file, loff_t offset, int whence)
225{
226 return file->f_pos;
227}
228EXPORT_SYMBOL(noop_llseek);
229
230loff_t no_llseek(struct file *file, loff_t offset, int whence)
231{
232 return -ESPIPE;
233}
234EXPORT_SYMBOL(no_llseek);
235
236loff_t default_llseek(struct file *file, loff_t offset, int whence)
237{
238 struct inode *inode = file_inode(file);
239 loff_t retval;
240
241 inode_lock(inode);
242 switch (whence) {
243 case SEEK_END:
244 offset += i_size_read(inode);
245 break;
246 case SEEK_CUR:
247 if (offset == 0) {
248 retval = file->f_pos;
249 goto out;
250 }
251 offset += file->f_pos;
252 break;
253 case SEEK_DATA:
254
255
256
257
258
259 if (offset >= inode->i_size) {
260 retval = -ENXIO;
261 goto out;
262 }
263 break;
264 case SEEK_HOLE:
265
266
267
268
269
270 if (offset >= inode->i_size) {
271 retval = -ENXIO;
272 goto out;
273 }
274 offset = inode->i_size;
275 break;
276 }
277 retval = -EINVAL;
278 if (offset >= 0 || unsigned_offsets(file)) {
279 if (offset != file->f_pos) {
280 file->f_pos = offset;
281 file->f_version = 0;
282 }
283 retval = offset;
284 }
285out:
286 inode_unlock(inode);
287 return retval;
288}
289EXPORT_SYMBOL(default_llseek);
290
291loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
292{
293 loff_t (*fn)(struct file *, loff_t, int);
294
295 fn = no_llseek;
296 if (file->f_mode & FMODE_LSEEK) {
297 if (file->f_op->llseek)
298 fn = file->f_op->llseek;
299 }
300 return fn(file, offset, whence);
301}
302EXPORT_SYMBOL(vfs_llseek);
303
304off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
305{
306 off_t retval;
307 struct fd f = fdget_pos(fd);
308 if (!f.file)
309 return -EBADF;
310
311 retval = -EINVAL;
312 if (whence <= SEEK_MAX) {
313 loff_t res = vfs_llseek(f.file, offset, whence);
314 retval = res;
315 if (res != (loff_t)retval)
316 retval = -EOVERFLOW;
317 }
318 fdput_pos(f);
319 return retval;
320}
321
322SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
323{
324 return ksys_lseek(fd, offset, whence);
325}
326
327#ifdef CONFIG_COMPAT
328COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
329{
330 return ksys_lseek(fd, offset, whence);
331}
332#endif
333
334#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
335SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
336 unsigned long, offset_low, loff_t __user *, result,
337 unsigned int, whence)
338{
339 int retval;
340 struct fd f = fdget_pos(fd);
341 loff_t offset;
342
343 if (!f.file)
344 return -EBADF;
345
346 retval = -EINVAL;
347 if (whence > SEEK_MAX)
348 goto out_putf;
349
350 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
351 whence);
352
353 retval = (int)offset;
354 if (offset >= 0) {
355 retval = -EFAULT;
356 if (!copy_to_user(result, &offset, sizeof(offset)))
357 retval = 0;
358 }
359out_putf:
360 fdput_pos(f);
361 return retval;
362}
363#endif
364
365int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
366{
367 struct inode *inode;
368 int retval = -EINVAL;
369
370 inode = file_inode(file);
371 if (unlikely((ssize_t) count < 0))
372 return retval;
373
374
375
376
377
378 if (ppos) {
379 loff_t pos = *ppos;
380
381 if (unlikely(pos < 0)) {
382 if (!unsigned_offsets(file))
383 return retval;
384 if (count >= -pos)
385 return -EOVERFLOW;
386 } else if (unlikely((loff_t) (pos + count) < 0)) {
387 if (!unsigned_offsets(file))
388 return retval;
389 }
390
391 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
392 retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
393 read_write == READ ? F_RDLCK : F_WRLCK);
394 if (retval < 0)
395 return retval;
396 }
397 }
398
399 return security_file_permission(file,
400 read_write == READ ? MAY_READ : MAY_WRITE);
401}
402
403static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
404{
405 struct iovec iov = { .iov_base = buf, .iov_len = len };
406 struct kiocb kiocb;
407 struct iov_iter iter;
408 ssize_t ret;
409
410 init_sync_kiocb(&kiocb, filp);
411 kiocb.ki_pos = (ppos ? *ppos : 0);
412 iov_iter_init(&iter, READ, &iov, 1, len);
413
414 ret = call_read_iter(filp, &kiocb, &iter);
415 BUG_ON(ret == -EIOCBQUEUED);
416 if (ppos)
417 *ppos = kiocb.ki_pos;
418 return ret;
419}
420
421ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
422 loff_t *pos)
423{
424 if (file->f_op->read)
425 return file->f_op->read(file, buf, count, pos);
426 else if (file->f_op->read_iter)
427 return new_sync_read(file, buf, count, pos);
428 else
429 return -EINVAL;
430}
431
432ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
433{
434 mm_segment_t old_fs;
435 ssize_t result;
436
437 old_fs = get_fs();
438 set_fs(KERNEL_DS);
439
440 result = vfs_read(file, (void __user *)buf, count, pos);
441 set_fs(old_fs);
442 return result;
443}
444EXPORT_SYMBOL(kernel_read);
445
446ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
447{
448 ssize_t ret;
449
450 if (!(file->f_mode & FMODE_READ))
451 return -EBADF;
452 if (!(file->f_mode & FMODE_CAN_READ))
453 return -EINVAL;
454 if (unlikely(!access_ok(buf, count)))
455 return -EFAULT;
456
457 ret = rw_verify_area(READ, file, pos, count);
458 if (!ret) {
459 if (count > MAX_RW_COUNT)
460 count = MAX_RW_COUNT;
461 ret = __vfs_read(file, buf, count, pos);
462 if (ret > 0) {
463 fsnotify_access(file);
464 add_rchar(current, ret);
465 }
466 inc_syscr(current);
467 }
468
469 return ret;
470}
471
472static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
473{
474 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
475 struct kiocb kiocb;
476 struct iov_iter iter;
477 ssize_t ret;
478
479 init_sync_kiocb(&kiocb, filp);
480 kiocb.ki_pos = (ppos ? *ppos : 0);
481 iov_iter_init(&iter, WRITE, &iov, 1, len);
482
483 ret = call_write_iter(filp, &kiocb, &iter);
484 BUG_ON(ret == -EIOCBQUEUED);
485 if (ret > 0 && ppos)
486 *ppos = kiocb.ki_pos;
487 return ret;
488}
489
490static ssize_t __vfs_write(struct file *file, const char __user *p,
491 size_t count, loff_t *pos)
492{
493 if (file->f_op->write)
494 return file->f_op->write(file, p, count, pos);
495 else if (file->f_op->write_iter)
496 return new_sync_write(file, p, count, pos);
497 else
498 return -EINVAL;
499}
500
501ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
502{
503 mm_segment_t old_fs;
504 const char __user *p;
505 ssize_t ret;
506
507 if (!(file->f_mode & FMODE_CAN_WRITE))
508 return -EINVAL;
509
510 old_fs = get_fs();
511 set_fs(KERNEL_DS);
512 p = (__force const char __user *)buf;
513 if (count > MAX_RW_COUNT)
514 count = MAX_RW_COUNT;
515 ret = __vfs_write(file, p, count, pos);
516 set_fs(old_fs);
517 if (ret > 0) {
518 fsnotify_modify(file);
519 add_wchar(current, ret);
520 }
521 inc_syscw(current);
522 return ret;
523}
524EXPORT_SYMBOL(__kernel_write);
525
526ssize_t kernel_write(struct file *file, const void *buf, size_t count,
527 loff_t *pos)
528{
529 mm_segment_t old_fs;
530 ssize_t res;
531
532 old_fs = get_fs();
533 set_fs(KERNEL_DS);
534
535 res = vfs_write(file, (__force const char __user *)buf, count, pos);
536 set_fs(old_fs);
537
538 return res;
539}
540EXPORT_SYMBOL(kernel_write);
541
542ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
543{
544 ssize_t ret;
545
546 if (!(file->f_mode & FMODE_WRITE))
547 return -EBADF;
548 if (!(file->f_mode & FMODE_CAN_WRITE))
549 return -EINVAL;
550 if (unlikely(!access_ok(buf, count)))
551 return -EFAULT;
552
553 ret = rw_verify_area(WRITE, file, pos, count);
554 if (!ret) {
555 if (count > MAX_RW_COUNT)
556 count = MAX_RW_COUNT;
557 file_start_write(file);
558 ret = __vfs_write(file, buf, count, pos);
559 if (ret > 0) {
560 fsnotify_modify(file);
561 add_wchar(current, ret);
562 }
563 inc_syscw(current);
564 file_end_write(file);
565 }
566
567 return ret;
568}
569
570
571static inline loff_t *file_ppos(struct file *file)
572{
573 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos;
574}
575
576ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
577{
578 struct fd f = fdget_pos(fd);
579 ssize_t ret = -EBADF;
580
581 if (f.file) {
582 loff_t pos, *ppos = file_ppos(f.file);
583 if (ppos) {
584 pos = *ppos;
585 ppos = &pos;
586 }
587 ret = vfs_read(f.file, buf, count, ppos);
588 if (ret >= 0 && ppos)
589 f.file->f_pos = pos;
590 fdput_pos(f);
591 }
592 return ret;
593}
594
595SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
596{
597 return ksys_read(fd, buf, count);
598}
599
600ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
601{
602 struct fd f = fdget_pos(fd);
603 ssize_t ret = -EBADF;
604
605 if (f.file) {
606 loff_t pos, *ppos = file_ppos(f.file);
607 if (ppos) {
608 pos = *ppos;
609 ppos = &pos;
610 }
611 ret = vfs_write(f.file, buf, count, ppos);
612 if (ret >= 0 && ppos)
613 f.file->f_pos = pos;
614 fdput_pos(f);
615 }
616
617 return ret;
618}
619
620SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
621 size_t, count)
622{
623 return ksys_write(fd, buf, count);
624}
625
626ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
627 loff_t pos)
628{
629 struct fd f;
630 ssize_t ret = -EBADF;
631
632 if (pos < 0)
633 return -EINVAL;
634
635 f = fdget(fd);
636 if (f.file) {
637 ret = -ESPIPE;
638 if (f.file->f_mode & FMODE_PREAD)
639 ret = vfs_read(f.file, buf, count, &pos);
640 fdput(f);
641 }
642
643 return ret;
644}
645
646SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
647 size_t, count, loff_t, pos)
648{
649 return ksys_pread64(fd, buf, count, pos);
650}
651
652ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
653 size_t count, loff_t pos)
654{
655 struct fd f;
656 ssize_t ret = -EBADF;
657
658 if (pos < 0)
659 return -EINVAL;
660
661 f = fdget(fd);
662 if (f.file) {
663 ret = -ESPIPE;
664 if (f.file->f_mode & FMODE_PWRITE)
665 ret = vfs_write(f.file, buf, count, &pos);
666 fdput(f);
667 }
668
669 return ret;
670}
671
672SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
673 size_t, count, loff_t, pos)
674{
675 return ksys_pwrite64(fd, buf, count, pos);
676}
677
678static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
679 loff_t *ppos, int type, rwf_t flags)
680{
681 struct kiocb kiocb;
682 ssize_t ret;
683
684 init_sync_kiocb(&kiocb, filp);
685 ret = kiocb_set_rw_flags(&kiocb, flags);
686 if (ret)
687 return ret;
688 kiocb.ki_pos = (ppos ? *ppos : 0);
689
690 if (type == READ)
691 ret = call_read_iter(filp, &kiocb, iter);
692 else
693 ret = call_write_iter(filp, &kiocb, iter);
694 BUG_ON(ret == -EIOCBQUEUED);
695 if (ppos)
696 *ppos = kiocb.ki_pos;
697 return ret;
698}
699
700
701static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
702 loff_t *ppos, int type, rwf_t flags)
703{
704 ssize_t ret = 0;
705
706 if (flags & ~RWF_HIPRI)
707 return -EOPNOTSUPP;
708
709 while (iov_iter_count(iter)) {
710 struct iovec iovec = iov_iter_iovec(iter);
711 ssize_t nr;
712
713 if (type == READ) {
714 nr = filp->f_op->read(filp, iovec.iov_base,
715 iovec.iov_len, ppos);
716 } else {
717 nr = filp->f_op->write(filp, iovec.iov_base,
718 iovec.iov_len, ppos);
719 }
720
721 if (nr < 0) {
722 if (!ret)
723 ret = nr;
724 break;
725 }
726 ret += nr;
727 if (nr != iovec.iov_len)
728 break;
729 iov_iter_advance(iter, nr);
730 }
731
732 return ret;
733}
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
765 unsigned long nr_segs, unsigned long fast_segs,
766 struct iovec *fast_pointer,
767 struct iovec **ret_pointer)
768{
769 unsigned long seg;
770 ssize_t ret;
771 struct iovec *iov = fast_pointer;
772
773
774
775
776
777
778 if (nr_segs == 0) {
779 ret = 0;
780 goto out;
781 }
782
783
784
785
786
787 if (nr_segs > UIO_MAXIOV) {
788 ret = -EINVAL;
789 goto out;
790 }
791 if (nr_segs > fast_segs) {
792 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
793 if (iov == NULL) {
794 ret = -ENOMEM;
795 goto out;
796 }
797 }
798 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
799 ret = -EFAULT;
800 goto out;
801 }
802
803
804
805
806
807
808
809
810
811
812 ret = 0;
813 for (seg = 0; seg < nr_segs; seg++) {
814 void __user *buf = iov[seg].iov_base;
815 ssize_t len = (ssize_t)iov[seg].iov_len;
816
817
818
819 if (len < 0) {
820 ret = -EINVAL;
821 goto out;
822 }
823 if (type >= 0
824 && unlikely(!access_ok(buf, len))) {
825 ret = -EFAULT;
826 goto out;
827 }
828 if (len > MAX_RW_COUNT - ret) {
829 len = MAX_RW_COUNT - ret;
830 iov[seg].iov_len = len;
831 }
832 ret += len;
833 }
834out:
835 *ret_pointer = iov;
836 return ret;
837}
838
839#ifdef CONFIG_COMPAT
840ssize_t compat_rw_copy_check_uvector(int type,
841 const struct compat_iovec __user *uvector, unsigned long nr_segs,
842 unsigned long fast_segs, struct iovec *fast_pointer,
843 struct iovec **ret_pointer)
844{
845 compat_ssize_t tot_len;
846 struct iovec *iov = *ret_pointer = fast_pointer;
847 ssize_t ret = 0;
848 int seg;
849
850
851
852
853
854
855 if (nr_segs == 0)
856 goto out;
857
858 ret = -EINVAL;
859 if (nr_segs > UIO_MAXIOV)
860 goto out;
861 if (nr_segs > fast_segs) {
862 ret = -ENOMEM;
863 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
864 if (iov == NULL)
865 goto out;
866 }
867 *ret_pointer = iov;
868
869 ret = -EFAULT;
870 if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
871 goto out;
872
873
874
875
876
877
878
879
880
881 tot_len = 0;
882 ret = -EINVAL;
883 for (seg = 0; seg < nr_segs; seg++) {
884 compat_uptr_t buf;
885 compat_ssize_t len;
886
887 if (__get_user(len, &uvector->iov_len) ||
888 __get_user(buf, &uvector->iov_base)) {
889 ret = -EFAULT;
890 goto out;
891 }
892 if (len < 0)
893 goto out;
894 if (type >= 0 &&
895 !access_ok(compat_ptr(buf), len)) {
896 ret = -EFAULT;
897 goto out;
898 }
899 if (len > MAX_RW_COUNT - tot_len)
900 len = MAX_RW_COUNT - tot_len;
901 tot_len += len;
902 iov->iov_base = compat_ptr(buf);
903 iov->iov_len = (compat_size_t) len;
904 uvector++;
905 iov++;
906 }
907 ret = tot_len;
908
909out:
910 return ret;
911}
912#endif
913
914static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
915 loff_t *pos, rwf_t flags)
916{
917 size_t tot_len;
918 ssize_t ret = 0;
919
920 if (!(file->f_mode & FMODE_READ))
921 return -EBADF;
922 if (!(file->f_mode & FMODE_CAN_READ))
923 return -EINVAL;
924
925 tot_len = iov_iter_count(iter);
926 if (!tot_len)
927 goto out;
928 ret = rw_verify_area(READ, file, pos, tot_len);
929 if (ret < 0)
930 return ret;
931
932 if (file->f_op->read_iter)
933 ret = do_iter_readv_writev(file, iter, pos, READ, flags);
934 else
935 ret = do_loop_readv_writev(file, iter, pos, READ, flags);
936out:
937 if (ret >= 0)
938 fsnotify_access(file);
939 return ret;
940}
941
942ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
943 rwf_t flags)
944{
945 if (!file->f_op->read_iter)
946 return -EINVAL;
947 return do_iter_read(file, iter, ppos, flags);
948}
949EXPORT_SYMBOL(vfs_iter_read);
950
951static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
952 loff_t *pos, rwf_t flags)
953{
954 size_t tot_len;
955 ssize_t ret = 0;
956
957 if (!(file->f_mode & FMODE_WRITE))
958 return -EBADF;
959 if (!(file->f_mode & FMODE_CAN_WRITE))
960 return -EINVAL;
961
962 tot_len = iov_iter_count(iter);
963 if (!tot_len)
964 return 0;
965 ret = rw_verify_area(WRITE, file, pos, tot_len);
966 if (ret < 0)
967 return ret;
968
969 if (file->f_op->write_iter)
970 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
971 else
972 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
973 if (ret > 0)
974 fsnotify_modify(file);
975 return ret;
976}
977
978ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
979 rwf_t flags)
980{
981 if (!file->f_op->write_iter)
982 return -EINVAL;
983 return do_iter_write(file, iter, ppos, flags);
984}
985EXPORT_SYMBOL(vfs_iter_write);
986
987ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
988 unsigned long vlen, loff_t *pos, rwf_t flags)
989{
990 struct iovec iovstack[UIO_FASTIOV];
991 struct iovec *iov = iovstack;
992 struct iov_iter iter;
993 ssize_t ret;
994
995 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
996 if (ret >= 0) {
997 ret = do_iter_read(file, &iter, pos, flags);
998 kfree(iov);
999 }
1000
1001 return ret;
1002}
1003
1004static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
1005 unsigned long vlen, loff_t *pos, rwf_t flags)
1006{
1007 struct iovec iovstack[UIO_FASTIOV];
1008 struct iovec *iov = iovstack;
1009 struct iov_iter iter;
1010 ssize_t ret;
1011
1012 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
1013 if (ret >= 0) {
1014 file_start_write(file);
1015 ret = do_iter_write(file, &iter, pos, flags);
1016 file_end_write(file);
1017 kfree(iov);
1018 }
1019 return ret;
1020}
1021
1022static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
1023 unsigned long vlen, rwf_t flags)
1024{
1025 struct fd f = fdget_pos(fd);
1026 ssize_t ret = -EBADF;
1027
1028 if (f.file) {
1029 loff_t pos, *ppos = file_ppos(f.file);
1030 if (ppos) {
1031 pos = *ppos;
1032 ppos = &pos;
1033 }
1034 ret = vfs_readv(f.file, vec, vlen, ppos, flags);
1035 if (ret >= 0 && ppos)
1036 f.file->f_pos = pos;
1037 fdput_pos(f);
1038 }
1039
1040 if (ret > 0)
1041 add_rchar(current, ret);
1042 inc_syscr(current);
1043 return ret;
1044}
1045
1046static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
1047 unsigned long vlen, rwf_t flags)
1048{
1049 struct fd f = fdget_pos(fd);
1050 ssize_t ret = -EBADF;
1051
1052 if (f.file) {
1053 loff_t pos, *ppos = file_ppos(f.file);
1054 if (ppos) {
1055 pos = *ppos;
1056 ppos = &pos;
1057 }
1058 ret = vfs_writev(f.file, vec, vlen, ppos, flags);
1059 if (ret >= 0 && ppos)
1060 f.file->f_pos = pos;
1061 fdput_pos(f);
1062 }
1063
1064 if (ret > 0)
1065 add_wchar(current, ret);
1066 inc_syscw(current);
1067 return ret;
1068}
1069
1070static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
1071{
1072#define HALF_LONG_BITS (BITS_PER_LONG / 2)
1073 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
1074}
1075
1076static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
1077 unsigned long vlen, loff_t pos, rwf_t flags)
1078{
1079 struct fd f;
1080 ssize_t ret = -EBADF;
1081
1082 if (pos < 0)
1083 return -EINVAL;
1084
1085 f = fdget(fd);
1086 if (f.file) {
1087 ret = -ESPIPE;
1088 if (f.file->f_mode & FMODE_PREAD)
1089 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
1090 fdput(f);
1091 }
1092
1093 if (ret > 0)
1094 add_rchar(current, ret);
1095 inc_syscr(current);
1096 return ret;
1097}
1098
1099static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
1100 unsigned long vlen, loff_t pos, rwf_t flags)
1101{
1102 struct fd f;
1103 ssize_t ret = -EBADF;
1104
1105 if (pos < 0)
1106 return -EINVAL;
1107
1108 f = fdget(fd);
1109 if (f.file) {
1110 ret = -ESPIPE;
1111 if (f.file->f_mode & FMODE_PWRITE)
1112 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
1113 fdput(f);
1114 }
1115
1116 if (ret > 0)
1117 add_wchar(current, ret);
1118 inc_syscw(current);
1119 return ret;
1120}
1121
1122SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
1123 unsigned long, vlen)
1124{
1125 return do_readv(fd, vec, vlen, 0);
1126}
1127
1128SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1129 unsigned long, vlen)
1130{
1131 return do_writev(fd, vec, vlen, 0);
1132}
1133
1134SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1135 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1136{
1137 loff_t pos = pos_from_hilo(pos_h, pos_l);
1138
1139 return do_preadv(fd, vec, vlen, pos, 0);
1140}
1141
1142SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1143 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1144 rwf_t, flags)
1145{
1146 loff_t pos = pos_from_hilo(pos_h, pos_l);
1147
1148 if (pos == -1)
1149 return do_readv(fd, vec, vlen, flags);
1150
1151 return do_preadv(fd, vec, vlen, pos, flags);
1152}
1153
1154SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1155 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1156{
1157 loff_t pos = pos_from_hilo(pos_h, pos_l);
1158
1159 return do_pwritev(fd, vec, vlen, pos, 0);
1160}
1161
1162SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1163 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1164 rwf_t, flags)
1165{
1166 loff_t pos = pos_from_hilo(pos_h, pos_l);
1167
1168 if (pos == -1)
1169 return do_writev(fd, vec, vlen, flags);
1170
1171 return do_pwritev(fd, vec, vlen, pos, flags);
1172}
1173
1174#ifdef CONFIG_COMPAT
1175static size_t compat_readv(struct file *file,
1176 const struct compat_iovec __user *vec,
1177 unsigned long vlen, loff_t *pos, rwf_t flags)
1178{
1179 struct iovec iovstack[UIO_FASTIOV];
1180 struct iovec *iov = iovstack;
1181 struct iov_iter iter;
1182 ssize_t ret;
1183
1184 ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
1185 if (ret >= 0) {
1186 ret = do_iter_read(file, &iter, pos, flags);
1187 kfree(iov);
1188 }
1189 if (ret > 0)
1190 add_rchar(current, ret);
1191 inc_syscr(current);
1192 return ret;
1193}
1194
1195static size_t do_compat_readv(compat_ulong_t fd,
1196 const struct compat_iovec __user *vec,
1197 compat_ulong_t vlen, rwf_t flags)
1198{
1199 struct fd f = fdget_pos(fd);
1200 ssize_t ret;
1201 loff_t pos;
1202
1203 if (!f.file)
1204 return -EBADF;
1205 pos = f.file->f_pos;
1206 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1207 if (ret >= 0)
1208 f.file->f_pos = pos;
1209 fdput_pos(f);
1210 return ret;
1211
1212}
1213
1214COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1215 const struct compat_iovec __user *,vec,
1216 compat_ulong_t, vlen)
1217{
1218 return do_compat_readv(fd, vec, vlen, 0);
1219}
1220
1221static long do_compat_preadv64(unsigned long fd,
1222 const struct compat_iovec __user *vec,
1223 unsigned long vlen, loff_t pos, rwf_t flags)
1224{
1225 struct fd f;
1226 ssize_t ret;
1227
1228 if (pos < 0)
1229 return -EINVAL;
1230 f = fdget(fd);
1231 if (!f.file)
1232 return -EBADF;
1233 ret = -ESPIPE;
1234 if (f.file->f_mode & FMODE_PREAD)
1235 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1236 fdput(f);
1237 return ret;
1238}
1239
1240#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
1241COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1242 const struct compat_iovec __user *,vec,
1243 unsigned long, vlen, loff_t, pos)
1244{
1245 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1246}
1247#endif
1248
1249COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1250 const struct compat_iovec __user *,vec,
1251 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1252{
1253 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1254
1255 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1256}
1257
1258#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
1259COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
1260 const struct compat_iovec __user *,vec,
1261 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1262{
1263 if (pos == -1)
1264 return do_compat_readv(fd, vec, vlen, flags);
1265
1266 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1267}
1268#endif
1269
1270COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1271 const struct compat_iovec __user *,vec,
1272 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1273 rwf_t, flags)
1274{
1275 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1276
1277 if (pos == -1)
1278 return do_compat_readv(fd, vec, vlen, flags);
1279
1280 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1281}
1282
1283static size_t compat_writev(struct file *file,
1284 const struct compat_iovec __user *vec,
1285 unsigned long vlen, loff_t *pos, rwf_t flags)
1286{
1287 struct iovec iovstack[UIO_FASTIOV];
1288 struct iovec *iov = iovstack;
1289 struct iov_iter iter;
1290 ssize_t ret;
1291
1292 ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
1293 if (ret >= 0) {
1294 file_start_write(file);
1295 ret = do_iter_write(file, &iter, pos, flags);
1296 file_end_write(file);
1297 kfree(iov);
1298 }
1299 if (ret > 0)
1300 add_wchar(current, ret);
1301 inc_syscw(current);
1302 return ret;
1303}
1304
1305static size_t do_compat_writev(compat_ulong_t fd,
1306 const struct compat_iovec __user* vec,
1307 compat_ulong_t vlen, rwf_t flags)
1308{
1309 struct fd f = fdget_pos(fd);
1310 ssize_t ret;
1311 loff_t pos;
1312
1313 if (!f.file)
1314 return -EBADF;
1315 pos = f.file->f_pos;
1316 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1317 if (ret >= 0)
1318 f.file->f_pos = pos;
1319 fdput_pos(f);
1320 return ret;
1321}
1322
1323COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1324 const struct compat_iovec __user *, vec,
1325 compat_ulong_t, vlen)
1326{
1327 return do_compat_writev(fd, vec, vlen, 0);
1328}
1329
1330static long do_compat_pwritev64(unsigned long fd,
1331 const struct compat_iovec __user *vec,
1332 unsigned long vlen, loff_t pos, rwf_t flags)
1333{
1334 struct fd f;
1335 ssize_t ret;
1336
1337 if (pos < 0)
1338 return -EINVAL;
1339 f = fdget(fd);
1340 if (!f.file)
1341 return -EBADF;
1342 ret = -ESPIPE;
1343 if (f.file->f_mode & FMODE_PWRITE)
1344 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1345 fdput(f);
1346 return ret;
1347}
1348
1349#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
1350COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1351 const struct compat_iovec __user *,vec,
1352 unsigned long, vlen, loff_t, pos)
1353{
1354 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1355}
1356#endif
1357
1358COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1359 const struct compat_iovec __user *,vec,
1360 compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
1361{
1362 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1363
1364 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1365}
1366
1367#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
1368COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
1369 const struct compat_iovec __user *,vec,
1370 unsigned long, vlen, loff_t, pos, rwf_t, flags)
1371{
1372 if (pos == -1)
1373 return do_compat_writev(fd, vec, vlen, flags);
1374
1375 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1376}
1377#endif
1378
1379COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1380 const struct compat_iovec __user *,vec,
1381 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
1382{
1383 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1384
1385 if (pos == -1)
1386 return do_compat_writev(fd, vec, vlen, flags);
1387
1388 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1389}
1390
1391#endif
1392
1393static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1394 size_t count, loff_t max)
1395{
1396 struct fd in, out;
1397 struct inode *in_inode, *out_inode;
1398 loff_t pos;
1399 loff_t out_pos;
1400 ssize_t retval;
1401 int fl;
1402
1403
1404
1405
1406 retval = -EBADF;
1407 in = fdget(in_fd);
1408 if (!in.file)
1409 goto out;
1410 if (!(in.file->f_mode & FMODE_READ))
1411 goto fput_in;
1412 retval = -ESPIPE;
1413 if (!ppos) {
1414 pos = in.file->f_pos;
1415 } else {
1416 pos = *ppos;
1417 if (!(in.file->f_mode & FMODE_PREAD))
1418 goto fput_in;
1419 }
1420 retval = rw_verify_area(READ, in.file, &pos, count);
1421 if (retval < 0)
1422 goto fput_in;
1423 if (count > MAX_RW_COUNT)
1424 count = MAX_RW_COUNT;
1425
1426
1427
1428
1429 retval = -EBADF;
1430 out = fdget(out_fd);
1431 if (!out.file)
1432 goto fput_in;
1433 if (!(out.file->f_mode & FMODE_WRITE))
1434 goto fput_out;
1435 in_inode = file_inode(in.file);
1436 out_inode = file_inode(out.file);
1437 out_pos = out.file->f_pos;
1438 retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1439 if (retval < 0)
1440 goto fput_out;
1441
1442 if (!max)
1443 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1444
1445 if (unlikely(pos + count > max)) {
1446 retval = -EOVERFLOW;
1447 if (pos >= max)
1448 goto fput_out;
1449 count = max - pos;
1450 }
1451
1452 fl = 0;
1453#if 0
1454
1455
1456
1457
1458
1459
1460 if (in.file->f_flags & O_NONBLOCK)
1461 fl = SPLICE_F_NONBLOCK;
1462#endif
1463 file_start_write(out.file);
1464 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1465 file_end_write(out.file);
1466
1467 if (retval > 0) {
1468 add_rchar(current, retval);
1469 add_wchar(current, retval);
1470 fsnotify_access(in.file);
1471 fsnotify_modify(out.file);
1472 out.file->f_pos = out_pos;
1473 if (ppos)
1474 *ppos = pos;
1475 else
1476 in.file->f_pos = pos;
1477 }
1478
1479 inc_syscr(current);
1480 inc_syscw(current);
1481 if (pos > max)
1482 retval = -EOVERFLOW;
1483
1484fput_out:
1485 fdput(out);
1486fput_in:
1487 fdput(in);
1488out:
1489 return retval;
1490}
1491
1492SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
1493{
1494 loff_t pos;
1495 off_t off;
1496 ssize_t ret;
1497
1498 if (offset) {
1499 if (unlikely(get_user(off, offset)))
1500 return -EFAULT;
1501 pos = off;
1502 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1503 if (unlikely(put_user(pos, offset)))
1504 return -EFAULT;
1505 return ret;
1506 }
1507
1508 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1509}
1510
1511SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
1512{
1513 loff_t pos;
1514 ssize_t ret;
1515
1516 if (offset) {
1517 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1518 return -EFAULT;
1519 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1520 if (unlikely(put_user(pos, offset)))
1521 return -EFAULT;
1522 return ret;
1523 }
1524
1525 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1526}
1527
1528#ifdef CONFIG_COMPAT
1529COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
1530 compat_off_t __user *, offset, compat_size_t, count)
1531{
1532 loff_t pos;
1533 off_t off;
1534 ssize_t ret;
1535
1536 if (offset) {
1537 if (unlikely(get_user(off, offset)))
1538 return -EFAULT;
1539 pos = off;
1540 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1541 if (unlikely(put_user(pos, offset)))
1542 return -EFAULT;
1543 return ret;
1544 }
1545
1546 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1547}
1548
1549COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1550 compat_loff_t __user *, offset, compat_size_t, count)
1551{
1552 loff_t pos;
1553 ssize_t ret;
1554
1555 if (offset) {
1556 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1557 return -EFAULT;
1558 ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1559 if (unlikely(put_user(pos, offset)))
1560 return -EFAULT;
1561 return ret;
1562 }
1563
1564 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1565}
1566#endif
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
1590 struct file *file_out, loff_t pos_out,
1591 size_t len, unsigned int flags)
1592{
1593 return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1594 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1595}
1596EXPORT_SYMBOL(generic_copy_file_range);
1597
1598static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
1599 struct file *file_out, loff_t pos_out,
1600 size_t len, unsigned int flags)
1601{
1602
1603
1604
1605
1606
1607
1608
1609
1610 if (file_out->f_op->copy_file_range &&
1611 file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
1612 return file_out->f_op->copy_file_range(file_in, pos_in,
1613 file_out, pos_out,
1614 len, flags);
1615
1616 return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1617 flags);
1618}
1619
1620
1621
1622
1623
1624
1625ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1626 struct file *file_out, loff_t pos_out,
1627 size_t len, unsigned int flags)
1628{
1629 ssize_t ret;
1630
1631 if (flags != 0)
1632 return -EINVAL;
1633
1634 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
1635 flags);
1636 if (unlikely(ret))
1637 return ret;
1638
1639 ret = rw_verify_area(READ, file_in, &pos_in, len);
1640 if (unlikely(ret))
1641 return ret;
1642
1643 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1644 if (unlikely(ret))
1645 return ret;
1646
1647 if (len == 0)
1648 return 0;
1649
1650 file_start_write(file_out);
1651
1652
1653
1654
1655
1656 if (file_in->f_op->remap_file_range &&
1657 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
1658 loff_t cloned;
1659
1660 cloned = file_in->f_op->remap_file_range(file_in, pos_in,
1661 file_out, pos_out,
1662 min_t(loff_t, MAX_RW_COUNT, len),
1663 REMAP_FILE_CAN_SHORTEN);
1664 if (cloned > 0) {
1665 ret = cloned;
1666 goto done;
1667 }
1668 }
1669
1670 ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1671 flags);
1672 WARN_ON_ONCE(ret == -EOPNOTSUPP);
1673done:
1674 if (ret > 0) {
1675 fsnotify_access(file_in);
1676 add_rchar(current, ret);
1677 fsnotify_modify(file_out);
1678 add_wchar(current, ret);
1679 }
1680
1681 inc_syscr(current);
1682 inc_syscw(current);
1683
1684 file_end_write(file_out);
1685
1686 return ret;
1687}
1688EXPORT_SYMBOL(vfs_copy_file_range);
1689
1690SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1691 int, fd_out, loff_t __user *, off_out,
1692 size_t, len, unsigned int, flags)
1693{
1694 loff_t pos_in;
1695 loff_t pos_out;
1696 struct fd f_in;
1697 struct fd f_out;
1698 ssize_t ret = -EBADF;
1699
1700 f_in = fdget(fd_in);
1701 if (!f_in.file)
1702 goto out2;
1703
1704 f_out = fdget(fd_out);
1705 if (!f_out.file)
1706 goto out1;
1707
1708 ret = -EFAULT;
1709 if (off_in) {
1710 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1711 goto out;
1712 } else {
1713 pos_in = f_in.file->f_pos;
1714 }
1715
1716 if (off_out) {
1717 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1718 goto out;
1719 } else {
1720 pos_out = f_out.file->f_pos;
1721 }
1722
1723 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1724 flags);
1725 if (ret > 0) {
1726 pos_in += ret;
1727 pos_out += ret;
1728
1729 if (off_in) {
1730 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1731 ret = -EFAULT;
1732 } else {
1733 f_in.file->f_pos = pos_in;
1734 }
1735
1736 if (off_out) {
1737 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1738 ret = -EFAULT;
1739 } else {
1740 f_out.file->f_pos = pos_out;
1741 }
1742 }
1743
1744out:
1745 fdput(f_out);
1746out1:
1747 fdput(f_in);
1748out2:
1749 return ret;
1750}
1751
1752static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
1753 bool write)
1754{
1755 struct inode *inode = file_inode(file);
1756
1757 if (unlikely(pos < 0 || len < 0))
1758 return -EINVAL;
1759
1760 if (unlikely((loff_t) (pos + len) < 0))
1761 return -EINVAL;
1762
1763 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1764 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1765 int retval;
1766
1767 retval = locks_mandatory_area(inode, file, pos, end,
1768 write ? F_WRLCK : F_RDLCK);
1769 if (retval < 0)
1770 return retval;
1771 }
1772
1773 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1774}
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787static int generic_remap_check_len(struct inode *inode_in,
1788 struct inode *inode_out,
1789 loff_t pos_out,
1790 loff_t *len,
1791 unsigned int remap_flags)
1792{
1793 u64 blkmask = i_blocksize(inode_in) - 1;
1794 loff_t new_len = *len;
1795
1796 if ((*len & blkmask) == 0)
1797 return 0;
1798
1799 if ((remap_flags & REMAP_FILE_DEDUP) ||
1800 pos_out + *len < i_size_read(inode_out))
1801 new_len &= ~blkmask;
1802
1803 if (new_len == *len)
1804 return 0;
1805
1806 if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
1807 *len = new_len;
1808 return 0;
1809 }
1810
1811 return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
1812}
1813
1814
1815static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1816{
1817 struct page *page;
1818
1819 page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
1820 if (IS_ERR(page))
1821 return page;
1822 if (!PageUptodate(page)) {
1823 put_page(page);
1824 return ERR_PTR(-EIO);
1825 }
1826 return page;
1827}
1828
1829
1830
1831
1832
1833static void vfs_lock_two_pages(struct page *page1, struct page *page2)
1834{
1835
1836 if (page1->index > page2->index)
1837 swap(page1, page2);
1838
1839 lock_page(page1);
1840 if (page1 != page2)
1841 lock_page(page2);
1842}
1843
1844
1845static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
1846{
1847 unlock_page(page1);
1848 if (page1 != page2)
1849 unlock_page(page2);
1850}
1851
1852
1853
1854
1855
1856static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1857 struct inode *dest, loff_t destoff,
1858 loff_t len, bool *is_same)
1859{
1860 loff_t src_poff;
1861 loff_t dest_poff;
1862 void *src_addr;
1863 void *dest_addr;
1864 struct page *src_page;
1865 struct page *dest_page;
1866 loff_t cmp_len;
1867 bool same;
1868 int error;
1869
1870 error = -EINVAL;
1871 same = true;
1872 while (len) {
1873 src_poff = srcoff & (PAGE_SIZE - 1);
1874 dest_poff = destoff & (PAGE_SIZE - 1);
1875 cmp_len = min(PAGE_SIZE - src_poff,
1876 PAGE_SIZE - dest_poff);
1877 cmp_len = min(cmp_len, len);
1878 if (cmp_len <= 0)
1879 goto out_error;
1880
1881 src_page = vfs_dedupe_get_page(src, srcoff);
1882 if (IS_ERR(src_page)) {
1883 error = PTR_ERR(src_page);
1884 goto out_error;
1885 }
1886 dest_page = vfs_dedupe_get_page(dest, destoff);
1887 if (IS_ERR(dest_page)) {
1888 error = PTR_ERR(dest_page);
1889 put_page(src_page);
1890 goto out_error;
1891 }
1892
1893 vfs_lock_two_pages(src_page, dest_page);
1894
1895
1896
1897
1898
1899
1900 if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
1901 src_page->mapping != src->i_mapping ||
1902 dest_page->mapping != dest->i_mapping) {
1903 same = false;
1904 goto unlock;
1905 }
1906
1907 src_addr = kmap_atomic(src_page);
1908 dest_addr = kmap_atomic(dest_page);
1909
1910 flush_dcache_page(src_page);
1911 flush_dcache_page(dest_page);
1912
1913 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1914 same = false;
1915
1916 kunmap_atomic(dest_addr);
1917 kunmap_atomic(src_addr);
1918unlock:
1919 vfs_unlock_two_pages(src_page, dest_page);
1920 put_page(dest_page);
1921 put_page(src_page);
1922
1923 if (!same)
1924 break;
1925
1926 srcoff += cmp_len;
1927 destoff += cmp_len;
1928 len -= cmp_len;
1929 }
1930
1931 *is_same = same;
1932 return 0;
1933
1934out_error:
1935 return error;
1936}
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1947 struct file *file_out, loff_t pos_out,
1948 loff_t *len, unsigned int remap_flags)
1949{
1950 struct inode *inode_in = file_inode(file_in);
1951 struct inode *inode_out = file_inode(file_out);
1952 bool same_inode = (inode_in == inode_out);
1953 int ret;
1954
1955
1956 if (IS_IMMUTABLE(inode_out))
1957 return -EPERM;
1958
1959 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1960 return -ETXTBSY;
1961
1962
1963 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1964 return -EISDIR;
1965 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1966 return -EINVAL;
1967
1968
1969 if (*len == 0) {
1970 loff_t isize = i_size_read(inode_in);
1971
1972 if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
1973 return 0;
1974 if (pos_in > isize)
1975 return -EINVAL;
1976 *len = isize - pos_in;
1977 if (*len == 0)
1978 return 0;
1979 }
1980
1981
1982 ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
1983 remap_flags);
1984 if (ret)
1985 return ret;
1986
1987
1988 inode_dio_wait(inode_in);
1989 if (!same_inode)
1990 inode_dio_wait(inode_out);
1991
1992 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1993 pos_in, pos_in + *len - 1);
1994 if (ret)
1995 return ret;
1996
1997 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1998 pos_out, pos_out + *len - 1);
1999 if (ret)
2000 return ret;
2001
2002
2003
2004
2005 if (remap_flags & REMAP_FILE_DEDUP) {
2006 bool is_same = false;
2007
2008 ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
2009 inode_out, pos_out, *len, &is_same);
2010 if (ret)
2011 return ret;
2012 if (!is_same)
2013 return -EBADE;
2014 }
2015
2016 ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
2017 remap_flags);
2018 if (ret)
2019 return ret;
2020
2021
2022 if (!(remap_flags & REMAP_FILE_DEDUP))
2023 ret = file_modified(file_out);
2024
2025 return ret;
2026}
2027EXPORT_SYMBOL(generic_remap_file_range_prep);
2028
2029loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
2030 struct file *file_out, loff_t pos_out,
2031 loff_t len, unsigned int remap_flags)
2032{
2033 loff_t ret;
2034
2035 WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
2036
2037
2038
2039
2040
2041
2042 if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
2043 return -EXDEV;
2044
2045 ret = generic_file_rw_checks(file_in, file_out);
2046 if (ret < 0)
2047 return ret;
2048
2049 if (!file_in->f_op->remap_file_range)
2050 return -EOPNOTSUPP;
2051
2052 ret = remap_verify_area(file_in, pos_in, len, false);
2053 if (ret)
2054 return ret;
2055
2056 ret = remap_verify_area(file_out, pos_out, len, true);
2057 if (ret)
2058 return ret;
2059
2060 ret = file_in->f_op->remap_file_range(file_in, pos_in,
2061 file_out, pos_out, len, remap_flags);
2062 if (ret < 0)
2063 return ret;
2064
2065 fsnotify_access(file_in);
2066 fsnotify_modify(file_out);
2067 return ret;
2068}
2069EXPORT_SYMBOL(do_clone_file_range);
2070
2071loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
2072 struct file *file_out, loff_t pos_out,
2073 loff_t len, unsigned int remap_flags)
2074{
2075 loff_t ret;
2076
2077 file_start_write(file_out);
2078 ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
2079 remap_flags);
2080 file_end_write(file_out);
2081
2082 return ret;
2083}
2084EXPORT_SYMBOL(vfs_clone_file_range);
2085
2086
2087static bool allow_file_dedupe(struct file *file)
2088{
2089 if (capable(CAP_SYS_ADMIN))
2090 return true;
2091 if (file->f_mode & FMODE_WRITE)
2092 return true;
2093 if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
2094 return true;
2095 if (!inode_permission(file_inode(file), MAY_WRITE))
2096 return true;
2097 return false;
2098}
2099
2100loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2101 struct file *dst_file, loff_t dst_pos,
2102 loff_t len, unsigned int remap_flags)
2103{
2104 loff_t ret;
2105
2106 WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
2107 REMAP_FILE_CAN_SHORTEN));
2108
2109 ret = mnt_want_write_file(dst_file);
2110 if (ret)
2111 return ret;
2112
2113 ret = remap_verify_area(dst_file, dst_pos, len, true);
2114 if (ret < 0)
2115 goto out_drop_write;
2116
2117 ret = -EPERM;
2118 if (!allow_file_dedupe(dst_file))
2119 goto out_drop_write;
2120
2121 ret = -EXDEV;
2122 if (src_file->f_path.mnt != dst_file->f_path.mnt)
2123 goto out_drop_write;
2124
2125 ret = -EISDIR;
2126 if (S_ISDIR(file_inode(dst_file)->i_mode))
2127 goto out_drop_write;
2128
2129 ret = -EINVAL;
2130 if (!dst_file->f_op->remap_file_range)
2131 goto out_drop_write;
2132
2133 if (len == 0) {
2134 ret = 0;
2135 goto out_drop_write;
2136 }
2137
2138 ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
2139 dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
2140out_drop_write:
2141 mnt_drop_write_file(dst_file);
2142
2143 return ret;
2144}
2145EXPORT_SYMBOL(vfs_dedupe_file_range_one);
2146
2147int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2148{
2149 struct file_dedupe_range_info *info;
2150 struct inode *src = file_inode(file);
2151 u64 off;
2152 u64 len;
2153 int i;
2154 int ret;
2155 u16 count = same->dest_count;
2156 loff_t deduped;
2157
2158 if (!(file->f_mode & FMODE_READ))
2159 return -EINVAL;
2160
2161 if (same->reserved1 || same->reserved2)
2162 return -EINVAL;
2163
2164 off = same->src_offset;
2165 len = same->src_length;
2166
2167 if (S_ISDIR(src->i_mode))
2168 return -EISDIR;
2169
2170 if (!S_ISREG(src->i_mode))
2171 return -EINVAL;
2172
2173 if (!file->f_op->remap_file_range)
2174 return -EOPNOTSUPP;
2175
2176 ret = remap_verify_area(file, off, len, false);
2177 if (ret < 0)
2178 return ret;
2179 ret = 0;
2180
2181 if (off + len > i_size_read(src))
2182 return -EINVAL;
2183
2184
2185 len = min_t(u64, len, 1 << 30);
2186
2187
2188 for (i = 0; i < count; i++) {
2189 same->info[i].bytes_deduped = 0ULL;
2190 same->info[i].status = FILE_DEDUPE_RANGE_SAME;
2191 }
2192
2193 for (i = 0, info = same->info; i < count; i++, info++) {
2194 struct fd dst_fd = fdget(info->dest_fd);
2195 struct file *dst_file = dst_fd.file;
2196
2197 if (!dst_file) {
2198 info->status = -EBADF;
2199 goto next_loop;
2200 }
2201
2202 if (info->reserved) {
2203 info->status = -EINVAL;
2204 goto next_fdput;
2205 }
2206
2207 deduped = vfs_dedupe_file_range_one(file, off, dst_file,
2208 info->dest_offset, len,
2209 REMAP_FILE_CAN_SHORTEN);
2210 if (deduped == -EBADE)
2211 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2212 else if (deduped < 0)
2213 info->status = deduped;
2214 else
2215 info->bytes_deduped = len;
2216
2217next_fdput:
2218 fdput(dst_fd);
2219next_loop:
2220 if (fatal_signal_pending(current))
2221 break;
2222 }
2223 return ret;
2224}
2225EXPORT_SYMBOL(vfs_dedupe_file_range);
2226