1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/pagemap.h>
30#include <linux/file.h>
31#include <linux/mm.h>
32#include <linux/export.h>
33#include <linux/swap.h>
34#include <linux/aio.h>
35#include <linux/syscalls.h>
36#include <uapi/linux/memfd.h>
37
38static struct vfsmount *shm_mnt;
39
40#ifdef CONFIG_SHMEM
41
42
43
44
45
46
47#include <linux/xattr.h>
48#include <linux/exportfs.h>
49#include <linux/posix_acl.h>
50#include <linux/generic_acl.h>
51#include <linux/mman.h>
52#include <linux/string.h>
53#include <linux/slab.h>
54#include <linux/backing-dev.h>
55#include <linux/shmem_fs.h>
56#include <linux/writeback.h>
57#include <linux/blkdev.h>
58#include <linux/pagevec.h>
59#include <linux/percpu_counter.h>
60#include <linux/falloc.h>
61#include <linux/splice.h>
62#include <linux/security.h>
63#include <linux/swapops.h>
64#include <linux/mempolicy.h>
65#include <linux/namei.h>
66#include <linux/ctype.h>
67#include <linux/migrate.h>
68#include <linux/highmem.h>
69#include <linux/seq_file.h>
70#include <linux/magic.h>
71#include <linux/fcntl.h>
72#include <linux/userfaultfd_k.h>
73#include <linux/rmap.h>
74#include <linux/uuid.h>
75
76#include <asm/uaccess.h>
77#include <asm/pgtable.h>
78
79#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
80#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
81
82
83#define BOGO_DIRENT_SIZE 20
84
85
86#define SHORT_SYMLINK_LEN 128
87
88
89
90
91
92
93struct shmem_falloc {
94 wait_queue_head_t *waitq;
95 pgoff_t start;
96 pgoff_t next;
97 pgoff_t nr_falloced;
98 pgoff_t nr_unswapped;
99};
100
101
102enum sgp_type {
103 SGP_READ,
104 SGP_CACHE,
105 SGP_DIRTY,
106 SGP_WRITE,
107 SGP_FALLOC,
108};
109
110#ifdef CONFIG_TMPFS
111static unsigned long shmem_default_max_blocks(void)
112{
113 return totalram_pages / 2;
114}
115
116static unsigned long shmem_default_max_inodes(void)
117{
118 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
119}
120#endif
121
122static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
123static int shmem_replace_page(struct page **pagep, gfp_t gfp,
124 struct shmem_inode_info *info, pgoff_t index);
125static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
126 struct page **pagep, enum sgp_type sgp,
127 gfp_t gfp, struct vm_area_struct *vma,
128 struct vm_fault *vmf, int *fault_type);
129
130static inline int shmem_getpage(struct inode *inode, pgoff_t index,
131 struct page **pagep, enum sgp_type sgp,
132 int *fault_type)
133{
134 return shmem_getpage_gfp(inode, index, pagep, sgp,
135 mapping_gfp_mask(inode->i_mapping),
136 NULL, NULL, fault_type);
137}
138
139static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
140{
141 return sb->s_fs_info;
142}
143
144
145
146
147
148
149
150static inline int shmem_acct_size(unsigned long flags, loff_t size)
151{
152 return (flags & VM_NORESERVE) ?
153 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
154}
155
156static inline void shmem_unacct_size(unsigned long flags, loff_t size)
157{
158 if (!(flags & VM_NORESERVE))
159 vm_unacct_memory(VM_ACCT(size));
160}
161
162
163
164
165
166
167
168static inline int shmem_acct_block(unsigned long flags, long pages)
169{
170 return (flags & VM_NORESERVE) ?
171 security_vm_enough_memory_mm(current->mm, pages *
172 VM_ACCT(PAGE_CACHE_SIZE)) : 0;
173}
174
175static inline void shmem_unacct_blocks(unsigned long flags, long pages)
176{
177 if (flags & VM_NORESERVE)
178 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
179}
180
181static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
182{
183 struct shmem_inode_info *info = SHMEM_I(inode);
184 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
185
186 if (shmem_acct_block(info->flags, pages))
187 return false;
188
189 if (sbinfo->max_blocks) {
190 if (percpu_counter_compare(&sbinfo->used_blocks,
191 sbinfo->max_blocks - pages) > 0)
192 goto unacct;
193 percpu_counter_add(&sbinfo->used_blocks, pages);
194 }
195
196 return true;
197
198unacct:
199 shmem_unacct_blocks(info->flags, pages);
200 return false;
201}
202
203static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
204{
205 struct shmem_inode_info *info = SHMEM_I(inode);
206 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
207
208 if (sbinfo->max_blocks)
209 percpu_counter_sub(&sbinfo->used_blocks, pages);
210 shmem_unacct_blocks(info->flags, pages);
211}
212
213static const struct super_operations shmem_ops;
214static const struct address_space_operations shmem_aops;
215static const struct file_operations shmem_file_operations;
216static const struct inode_operations shmem_inode_operations;
217static const struct inode_operations_wrapper shmem_dir_inode_operations;
218static const struct inode_operations shmem_special_inode_operations;
219static const struct vm_operations_struct shmem_vm_ops;
220
221static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
222 .ra_pages = 0,
223 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
224};
225
226bool vma_is_shmem(struct vm_area_struct *vma)
227{
228 return vma->vm_ops == &shmem_vm_ops;
229}
230
231static LIST_HEAD(shmem_swaplist);
232static DEFINE_MUTEX(shmem_swaplist_mutex);
233
234static int shmem_reserve_inode(struct super_block *sb)
235{
236 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
237 if (sbinfo->max_inodes) {
238 spin_lock(&sbinfo->stat_lock);
239 if (!sbinfo->free_inodes) {
240 spin_unlock(&sbinfo->stat_lock);
241 return -ENOSPC;
242 }
243 sbinfo->free_inodes--;
244 spin_unlock(&sbinfo->stat_lock);
245 }
246 return 0;
247}
248
249static void shmem_free_inode(struct super_block *sb)
250{
251 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
252 if (sbinfo->max_inodes) {
253 spin_lock(&sbinfo->stat_lock);
254 sbinfo->free_inodes++;
255 spin_unlock(&sbinfo->stat_lock);
256 }
257}
258
259
260
261
262
263
264
265
266
267
268
269
270
271static void shmem_recalc_inode(struct inode *inode)
272{
273 struct shmem_inode_info *info = SHMEM_I(inode);
274 long freed;
275
276 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
277 if (freed > 0) {
278 info->alloced -= freed;
279 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
280 shmem_inode_unacct_blocks(inode, freed);
281 }
282}
283
284
285
286
287static int shmem_radix_tree_replace(struct address_space *mapping,
288 pgoff_t index, void *expected, void *replacement)
289{
290 void **pslot;
291 void *item;
292
293 VM_BUG_ON(!expected);
294 VM_BUG_ON(!replacement);
295 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
296 if (!pslot)
297 return -ENOENT;
298 item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
299 if (item != expected)
300 return -ENOENT;
301 radix_tree_replace_slot(pslot, replacement);
302 return 0;
303}
304
305
306
307
308
309
310
311
312static bool shmem_confirm_swap(struct address_space *mapping,
313 pgoff_t index, swp_entry_t swap)
314{
315 void *item;
316
317 rcu_read_lock();
318 item = radix_tree_lookup(&mapping->page_tree, index);
319 rcu_read_unlock();
320 return item == swp_to_radix_entry(swap);
321}
322
323
324
325
326static int shmem_add_to_page_cache(struct page *page,
327 struct address_space *mapping,
328 pgoff_t index, gfp_t gfp, void *expected)
329{
330 int error;
331
332 VM_BUG_ON_PAGE(!PageLocked(page), page);
333 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
334
335 page_cache_get(page);
336 page->mapping = mapping;
337 page->index = index;
338
339 spin_lock_irq(&mapping->tree_lock);
340 if (!expected)
341 error = radix_tree_insert(&mapping->page_tree, index, page);
342 else
343 error = shmem_radix_tree_replace(mapping, index, expected,
344 page);
345 if (!error) {
346 mapping->nrpages++;
347 __inc_zone_page_state(page, NR_FILE_PAGES);
348 __inc_zone_page_state(page, NR_SHMEM);
349 spin_unlock_irq(&mapping->tree_lock);
350 } else {
351 page->mapping = NULL;
352 spin_unlock_irq(&mapping->tree_lock);
353 page_cache_release(page);
354 }
355 return error;
356}
357
358
359
360
361static void shmem_delete_from_page_cache(struct page *page, void *radswap)
362{
363 struct address_space *mapping = page->mapping;
364 int error;
365
366 spin_lock_irq(&mapping->tree_lock);
367 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
368 page->mapping = NULL;
369 mapping->nrpages--;
370 __dec_zone_page_state(page, NR_FILE_PAGES);
371 __dec_zone_page_state(page, NR_SHMEM);
372 spin_unlock_irq(&mapping->tree_lock);
373 page_cache_release(page);
374 BUG_ON(error);
375}
376
377
378
379
380static int shmem_free_swap(struct address_space *mapping,
381 pgoff_t index, void *radswap)
382{
383 void *old;
384
385 spin_lock_irq(&mapping->tree_lock);
386 old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
387 spin_unlock_irq(&mapping->tree_lock);
388 if (old != radswap)
389 return -ENOENT;
390 free_swap_and_cache(radix_to_swp_entry(radswap));
391 return 0;
392}
393
394
395
396
397
398
399
400
401unsigned long shmem_partial_swap_usage(struct address_space *mapping,
402 pgoff_t start, pgoff_t end)
403{
404 struct radix_tree_iter iter;
405 void **slot;
406 struct page *page;
407 unsigned long swapped = 0;
408
409 rcu_read_lock();
410
411 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
412 if (iter.index >= end)
413 break;
414
415 page = radix_tree_deref_slot(slot);
416
417 if (radix_tree_deref_retry(page)) {
418 slot = radix_tree_iter_retry(&iter);
419 continue;
420 }
421
422 if (radix_tree_exceptional_entry(page))
423 swapped++;
424
425 if (need_resched()) {
426 cond_resched_rcu();
427 slot = radix_tree_iter_next(&iter);
428 }
429 }
430
431 rcu_read_unlock();
432
433 return swapped << PAGE_SHIFT;
434}
435
436
437
438
439
440
441
442
443unsigned long shmem_swap_usage(struct vm_area_struct *vma)
444{
445 struct inode *inode = file_inode(vma->vm_file);
446 struct shmem_inode_info *info = SHMEM_I(inode);
447 struct address_space *mapping = inode->i_mapping;
448 unsigned long swapped;
449
450
451 swapped = READ_ONCE(info->swapped);
452
453
454
455
456
457
458 if (!swapped)
459 return 0;
460
461 if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
462 return swapped << PAGE_SHIFT;
463
464
465 return shmem_partial_swap_usage(mapping,
466 linear_page_index(vma, vma->vm_start),
467 linear_page_index(vma, vma->vm_end));
468}
469
470
471
472
473void shmem_unlock_mapping(struct address_space *mapping)
474{
475 struct pagevec pvec;
476 pgoff_t indices[PAGEVEC_SIZE];
477 pgoff_t index = 0;
478
479 pagevec_init(&pvec, 0);
480
481
482
483 while (!mapping_unevictable(mapping)) {
484
485
486
487
488 pvec.nr = __find_get_pages(mapping, index,
489 PAGEVEC_SIZE, pvec.pages, indices);
490 if (!pvec.nr)
491 break;
492 index = indices[pvec.nr - 1] + 1;
493 pagevec_remove_exceptionals(&pvec);
494 check_move_unevictable_pages(pvec.pages, pvec.nr);
495 pagevec_release(&pvec);
496 cond_resched();
497 }
498}
499
500
501
502
503
504static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
505 bool unfalloc)
506{
507 struct address_space *mapping = inode->i_mapping;
508 struct shmem_inode_info *info = SHMEM_I(inode);
509 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
510 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
511 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
512 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
513 struct pagevec pvec;
514 pgoff_t indices[PAGEVEC_SIZE];
515 long nr_swaps_freed = 0;
516 pgoff_t index;
517 int i;
518
519 if (lend == -1)
520 end = -1;
521
522 pagevec_init(&pvec, 0);
523 index = start;
524 while (index < end) {
525 pvec.nr = __find_get_pages(mapping, index,
526 min(end - index, (pgoff_t)PAGEVEC_SIZE),
527 pvec.pages, indices);
528 if (!pvec.nr)
529 break;
530 mem_cgroup_uncharge_start();
531 for (i = 0; i < pagevec_count(&pvec); i++) {
532 struct page *page = pvec.pages[i];
533
534 index = indices[i];
535 if (index >= end)
536 break;
537
538 if (radix_tree_exceptional_entry(page)) {
539 if (unfalloc)
540 continue;
541 nr_swaps_freed += !shmem_free_swap(mapping,
542 index, page);
543 continue;
544 }
545
546 if (!trylock_page(page))
547 continue;
548 if (!unfalloc || !PageUptodate(page)) {
549 if (page->mapping == mapping) {
550 VM_BUG_ON_PAGE(PageWriteback(page), page);
551 truncate_inode_page(mapping, page);
552 }
553 }
554 unlock_page(page);
555 }
556 pagevec_remove_exceptionals(&pvec);
557 pagevec_release(&pvec);
558 mem_cgroup_uncharge_end();
559 cond_resched();
560 index++;
561 }
562
563 if (partial_start) {
564 struct page *page = NULL;
565 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
566 if (page) {
567 unsigned int top = PAGE_CACHE_SIZE;
568 if (start > end) {
569 top = partial_end;
570 partial_end = 0;
571 }
572 zero_user_segment(page, partial_start, top);
573 set_page_dirty(page);
574 unlock_page(page);
575 page_cache_release(page);
576 }
577 }
578 if (partial_end) {
579 struct page *page = NULL;
580 shmem_getpage(inode, end, &page, SGP_READ, NULL);
581 if (page) {
582 zero_user_segment(page, 0, partial_end);
583 set_page_dirty(page);
584 unlock_page(page);
585 page_cache_release(page);
586 }
587 }
588 if (start >= end)
589 return;
590
591 index = start;
592 while (index < end) {
593 cond_resched();
594
595 pvec.nr = __find_get_pages(mapping, index,
596 min(end - index, (pgoff_t)PAGEVEC_SIZE),
597 pvec.pages, indices);
598 if (!pvec.nr) {
599
600 if (index == start || end != -1)
601 break;
602
603 index = start;
604 continue;
605 }
606 mem_cgroup_uncharge_start();
607 for (i = 0; i < pagevec_count(&pvec); i++) {
608 struct page *page = pvec.pages[i];
609
610 index = indices[i];
611 if (index >= end)
612 break;
613
614 if (radix_tree_exceptional_entry(page)) {
615 if (unfalloc)
616 continue;
617 if (shmem_free_swap(mapping, index, page)) {
618
619 index--;
620 break;
621 }
622 nr_swaps_freed++;
623 continue;
624 }
625
626 lock_page(page);
627 if (!unfalloc || !PageUptodate(page)) {
628 if (page->mapping == mapping) {
629 VM_BUG_ON_PAGE(PageWriteback(page), page);
630 truncate_inode_page(mapping, page);
631 } else {
632
633 unlock_page(page);
634 index--;
635 break;
636 }
637 }
638 unlock_page(page);
639 }
640 pagevec_remove_exceptionals(&pvec);
641 pagevec_release(&pvec);
642 mem_cgroup_uncharge_end();
643 index++;
644 }
645
646 spin_lock(&info->lock);
647 info->swapped -= nr_swaps_freed;
648 shmem_recalc_inode(inode);
649 spin_unlock(&info->lock);
650}
651
652void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
653{
654 shmem_undo_range(inode, lstart, lend, false);
655 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
656}
657EXPORT_SYMBOL_GPL(shmem_truncate_range);
658
659static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
660{
661 struct inode *inode = dentry->d_inode;
662 struct shmem_inode_info *info = SHMEM_I(inode);
663 int error;
664
665 error = inode_change_ok(inode, attr);
666 if (error)
667 return error;
668
669 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
670 loff_t oldsize = inode->i_size;
671 loff_t newsize = attr->ia_size;
672
673
674 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
675 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
676 return -EPERM;
677
678 if (newsize != oldsize) {
679 i_size_write(inode, newsize);
680 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
681 }
682 if (newsize < oldsize) {
683 loff_t holebegin = round_up(newsize, PAGE_SIZE);
684 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
685 shmem_truncate_range(inode, newsize, (loff_t)-1);
686
687 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
688 }
689 }
690
691 setattr_copy(inode, attr);
692#ifdef CONFIG_TMPFS_POSIX_ACL
693 if (attr->ia_valid & ATTR_MODE)
694 error = generic_acl_chmod(inode);
695#endif
696 return error;
697}
698
699static void shmem_evict_inode(struct inode *inode)
700{
701 struct shmem_inode_info *info = SHMEM_I(inode);
702
703 if (inode->i_mapping->a_ops == &shmem_aops) {
704 shmem_unacct_size(info->flags, inode->i_size);
705 inode->i_size = 0;
706 shmem_truncate_range(inode, 0, (loff_t)-1);
707 if (!list_empty(&info->swaplist)) {
708 mutex_lock(&shmem_swaplist_mutex);
709 list_del_init(&info->swaplist);
710 mutex_unlock(&shmem_swaplist_mutex);
711 }
712 } else
713 kfree(info->symlink);
714
715 simple_xattrs_free(&info->xattrs);
716 WARN_ON(inode->i_blocks);
717 shmem_free_inode(inode->i_sb);
718 clear_inode(inode);
719}
720
721
722
723
724static int shmem_unuse_inode(struct shmem_inode_info *info,
725 swp_entry_t swap, struct page **pagep)
726{
727 struct address_space *mapping = info->vfs_inode.i_mapping;
728 void *radswap;
729 pgoff_t index;
730 gfp_t gfp;
731 int error = 0;
732
733 radswap = swp_to_radix_entry(swap);
734 index = radix_tree_locate_item(&mapping->page_tree, radswap);
735 if (index == -1)
736 return 0;
737
738
739
740
741
742
743
744 if (shmem_swaplist.next != &info->swaplist)
745 list_move_tail(&shmem_swaplist, &info->swaplist);
746
747 gfp = mapping_gfp_mask(mapping);
748 if (shmem_should_replace_page(*pagep, gfp)) {
749 mutex_unlock(&shmem_swaplist_mutex);
750 error = shmem_replace_page(pagep, gfp, info, index);
751 mutex_lock(&shmem_swaplist_mutex);
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770 if (!page_swapcount(*pagep))
771 error = -ENOENT;
772 }
773
774
775
776
777
778
779 if (!error)
780 error = shmem_add_to_page_cache(*pagep, mapping, index,
781 GFP_NOWAIT, radswap);
782 if (error != -ENOMEM) {
783
784
785
786
787 delete_from_swap_cache(*pagep);
788 set_page_dirty(*pagep);
789 if (!error) {
790 spin_lock(&info->lock);
791 info->swapped--;
792 spin_unlock(&info->lock);
793 swap_free(swap);
794 }
795 error = 1;
796 }
797 return error;
798}
799
800
801
802
803int shmem_unuse(swp_entry_t swap, struct page *page)
804{
805 struct list_head *this, *next;
806 struct shmem_inode_info *info;
807 int found = 0;
808 int error = 0;
809
810
811
812
813
814 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
815 goto out;
816
817
818
819
820
821
822 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
823 if (error)
824 goto out;
825
826
827 mutex_lock(&shmem_swaplist_mutex);
828 list_for_each_safe(this, next, &shmem_swaplist) {
829 info = list_entry(this, struct shmem_inode_info, swaplist);
830 if (info->swapped)
831 found = shmem_unuse_inode(info, swap, &page);
832 else
833 list_del_init(&info->swaplist);
834 cond_resched();
835 if (found)
836 break;
837 }
838 mutex_unlock(&shmem_swaplist_mutex);
839
840 if (found < 0)
841 error = found;
842out:
843 unlock_page(page);
844 page_cache_release(page);
845 return error;
846}
847
848
849
850
851static int shmem_writepage(struct page *page, struct writeback_control *wbc)
852{
853 struct shmem_inode_info *info;
854 struct address_space *mapping;
855 struct inode *inode;
856 swp_entry_t swap;
857 pgoff_t index;
858
859 BUG_ON(!PageLocked(page));
860 mapping = page->mapping;
861 index = page->index;
862 inode = mapping->host;
863 info = SHMEM_I(inode);
864 if (info->flags & VM_LOCKED)
865 goto redirty;
866 if (!total_swap_pages)
867 goto redirty;
868
869
870
871
872
873
874
875
876 if (!wbc->for_reclaim) {
877 WARN_ON_ONCE(1);
878 goto redirty;
879 }
880
881
882
883
884
885
886
887
888
889
890
891
892 if (!PageUptodate(page)) {
893 if (inode->i_private) {
894 struct shmem_falloc *shmem_falloc;
895 spin_lock(&inode->i_lock);
896 shmem_falloc = inode->i_private;
897 if (shmem_falloc &&
898 !shmem_falloc->waitq &&
899 index >= shmem_falloc->start &&
900 index < shmem_falloc->next)
901 shmem_falloc->nr_unswapped++;
902 else
903 shmem_falloc = NULL;
904 spin_unlock(&inode->i_lock);
905 if (shmem_falloc)
906 goto redirty;
907 }
908 clear_highpage(page);
909 flush_dcache_page(page);
910 SetPageUptodate(page);
911 }
912
913 swap = get_swap_page();
914 if (!swap.val)
915 goto redirty;
916
917
918
919
920
921
922
923
924
925 mutex_lock(&shmem_swaplist_mutex);
926 if (list_empty(&info->swaplist))
927 list_add_tail(&info->swaplist, &shmem_swaplist);
928
929 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
930 spin_lock(&info->lock);
931 shmem_recalc_inode(inode);
932 info->swapped++;
933 spin_unlock(&info->lock);
934
935 swap_shmem_alloc(swap);
936 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
937
938 mutex_unlock(&shmem_swaplist_mutex);
939 BUG_ON(page_mapped(page));
940 swap_writepage(page, wbc);
941 return 0;
942 }
943
944 mutex_unlock(&shmem_swaplist_mutex);
945 swapcache_free(swap, NULL);
946redirty:
947 set_page_dirty(page);
948 if (wbc->for_reclaim)
949 return AOP_WRITEPAGE_ACTIVATE;
950 unlock_page(page);
951 return 0;
952}
953
954#ifdef CONFIG_NUMA
955#ifdef CONFIG_TMPFS
956static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
957{
958 char buffer[64];
959
960 if (!mpol || mpol->mode == MPOL_DEFAULT)
961 return;
962
963 mpol_to_str(buffer, sizeof(buffer), mpol);
964
965 seq_printf(seq, ",mpol=%s", buffer);
966}
967
968static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
969{
970 struct mempolicy *mpol = NULL;
971 if (sbinfo->mpol) {
972 spin_lock(&sbinfo->stat_lock);
973 mpol = sbinfo->mpol;
974 mpol_get(mpol);
975 spin_unlock(&sbinfo->stat_lock);
976 }
977 return mpol;
978}
979#endif
980
981static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
982 struct shmem_inode_info *info, pgoff_t index)
983{
984 struct vm_area_struct pvma;
985 struct page *page;
986
987
988 pvma.vm_start = 0;
989
990 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
991 pvma.vm_ops = NULL;
992 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
993
994 page = swapin_readahead(swap, gfp, &pvma, 0);
995
996
997 mpol_cond_put(pvma.vm_policy);
998
999 return page;
1000}
1001
1002static struct page *shmem_alloc_page(gfp_t gfp,
1003 struct shmem_inode_info *info, pgoff_t index)
1004{
1005 struct vm_area_struct pvma;
1006 struct page *page;
1007
1008
1009 pvma.vm_start = 0;
1010
1011 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
1012 pvma.vm_ops = NULL;
1013 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
1014
1015 page = alloc_page_vma(gfp, &pvma, 0);
1016
1017
1018 mpol_cond_put(pvma.vm_policy);
1019
1020 return page;
1021}
1022#else
1023#ifdef CONFIG_TMPFS
1024static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1025{
1026}
1027#endif
1028
1029static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1030 struct shmem_inode_info *info, pgoff_t index)
1031{
1032 return swapin_readahead(swap, gfp, NULL, 0);
1033}
1034
1035static inline struct page *shmem_alloc_page(gfp_t gfp,
1036 struct shmem_inode_info *info, pgoff_t index)
1037{
1038 return alloc_page(gfp);
1039}
1040#endif
1041
1042#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
1043static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1044{
1045 return NULL;
1046}
1047#endif
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
1062{
1063 return page_zonenum(page) > gfp_zone(gfp);
1064}
1065
1066static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1067 struct shmem_inode_info *info, pgoff_t index)
1068{
1069 struct page *oldpage, *newpage;
1070 struct address_space *swap_mapping;
1071 pgoff_t swap_index;
1072 int error;
1073
1074 oldpage = *pagep;
1075 swap_index = page_private(oldpage);
1076 swap_mapping = page_mapping(oldpage);
1077
1078
1079
1080
1081
1082 gfp &= ~GFP_CONSTRAINT_MASK;
1083 newpage = shmem_alloc_page(gfp, info, index);
1084 if (!newpage)
1085 return -ENOMEM;
1086
1087 page_cache_get(newpage);
1088 copy_highpage(newpage, oldpage);
1089 flush_dcache_page(newpage);
1090
1091 __set_page_locked(newpage);
1092 SetPageUptodate(newpage);
1093 SetPageSwapBacked(newpage);
1094 set_page_private(newpage, swap_index);
1095 SetPageSwapCache(newpage);
1096
1097
1098
1099
1100
1101 spin_lock_irq(&swap_mapping->tree_lock);
1102 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1103 newpage);
1104 if (!error) {
1105 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1106 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1107 }
1108 spin_unlock_irq(&swap_mapping->tree_lock);
1109
1110 if (unlikely(error)) {
1111
1112
1113
1114
1115
1116 oldpage = newpage;
1117 } else {
1118 mem_cgroup_replace_page_cache(oldpage, newpage);
1119 lru_cache_add_anon(newpage);
1120 *pagep = newpage;
1121 }
1122
1123 ClearPageSwapCache(oldpage);
1124 set_page_private(oldpage, 0);
1125
1126 unlock_page(oldpage);
1127 page_cache_release(oldpage);
1128 page_cache_release(oldpage);
1129 return error;
1130}
1131
1132
1133
1134
1135
1136
1137
1138
1139static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1140 struct page **pagep, enum sgp_type sgp, gfp_t gfp,
1141 struct vm_area_struct *vma, struct vm_fault *vmf,
1142 int *fault_type)
1143{
1144 struct address_space *mapping = inode->i_mapping;
1145 struct shmem_inode_info *info;
1146 struct page *page;
1147 swp_entry_t swap;
1148 int error;
1149 int once = 0;
1150 int alloced = 0;
1151
1152 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1153 return -EFBIG;
1154repeat:
1155 swap.val = 0;
1156 page = __find_lock_page(mapping, index);
1157 if (radix_tree_exceptional_entry(page)) {
1158 swap = radix_to_swp_entry(page);
1159 page = NULL;
1160 }
1161
1162 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1163 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1164 error = -EINVAL;
1165 goto unlock;
1166 }
1167
1168
1169 if (page && !PageUptodate(page)) {
1170 if (sgp != SGP_READ)
1171 goto clear;
1172 unlock_page(page);
1173 page_cache_release(page);
1174 page = NULL;
1175 }
1176 if (page || (sgp == SGP_READ && !swap.val)) {
1177 *pagep = page;
1178 return 0;
1179 }
1180
1181
1182
1183
1184
1185 info = SHMEM_I(inode);
1186
1187 if (swap.val) {
1188
1189 page = lookup_swap_cache(swap);
1190 if (!page) {
1191
1192 if (fault_type)
1193 *fault_type |= VM_FAULT_MAJOR;
1194 page = shmem_swapin(swap, gfp, info, index);
1195 if (!page) {
1196 error = -ENOMEM;
1197 goto failed;
1198 }
1199 }
1200
1201
1202 lock_page(page);
1203 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1204 !shmem_confirm_swap(mapping, index, swap)) {
1205 error = -EEXIST;
1206 goto unlock;
1207 }
1208 if (!PageUptodate(page)) {
1209 error = -EIO;
1210 goto failed;
1211 }
1212 wait_on_page_writeback(page);
1213
1214 if (shmem_should_replace_page(page, gfp)) {
1215 error = shmem_replace_page(&page, gfp, info, index);
1216 if (error)
1217 goto failed;
1218 }
1219
1220 error = mem_cgroup_cache_charge(page, current->mm,
1221 gfp & GFP_RECLAIM_MASK);
1222 if (!error) {
1223 error = shmem_add_to_page_cache(page, mapping, index,
1224 gfp, swp_to_radix_entry(swap));
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237 if (error)
1238 delete_from_swap_cache(page);
1239 }
1240 if (error)
1241 goto failed;
1242
1243 spin_lock(&info->lock);
1244 info->swapped--;
1245 shmem_recalc_inode(inode);
1246 spin_unlock(&info->lock);
1247
1248 delete_from_swap_cache(page);
1249 set_page_dirty(page);
1250 swap_free(swap);
1251
1252 } else {
1253 if (vma && userfaultfd_missing(vma)) {
1254 *fault_type = handle_userfault(vmf,
1255 VM_UFFD_MISSING);
1256 return 0;
1257 }
1258 if (!shmem_inode_acct_block(inode, 1)) {
1259 error = -ENOSPC;
1260
1261 goto failed;
1262 }
1263
1264 page = shmem_alloc_page(gfp, info, index);
1265 if (!page) {
1266 error = -ENOMEM;
1267 goto decused;
1268 }
1269
1270 SetPageSwapBacked(page);
1271 __set_page_locked(page);
1272 error = mem_cgroup_cache_charge(page, current->mm,
1273 gfp & GFP_RECLAIM_MASK);
1274 if (error)
1275 goto decused;
1276 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1277 if (!error) {
1278 error = shmem_add_to_page_cache(page, mapping, index,
1279 gfp, NULL);
1280 radix_tree_preload_end();
1281 }
1282 if (error) {
1283 mem_cgroup_uncharge_cache_page(page);
1284 goto decused;
1285 }
1286 lru_cache_add_anon(page);
1287
1288 spin_lock(&info->lock);
1289 info->alloced++;
1290 inode->i_blocks += BLOCKS_PER_PAGE;
1291 shmem_recalc_inode(inode);
1292 spin_unlock(&info->lock);
1293 alloced = true;
1294
1295
1296
1297
1298 if (sgp == SGP_FALLOC)
1299 sgp = SGP_WRITE;
1300clear:
1301
1302
1303
1304
1305
1306 if (sgp != SGP_WRITE) {
1307 clear_highpage(page);
1308 flush_dcache_page(page);
1309 SetPageUptodate(page);
1310 }
1311 if (sgp == SGP_DIRTY)
1312 set_page_dirty(page);
1313 }
1314
1315
1316 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1317 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1318 if (alloced) {
1319 ClearPageDirty(page);
1320 delete_from_page_cache(page);
1321 spin_lock(&info->lock);
1322 shmem_recalc_inode(inode);
1323 spin_unlock(&info->lock);
1324 }
1325 error = -EINVAL;
1326 goto unlock;
1327 }
1328 *pagep = page;
1329 return 0;
1330
1331
1332
1333
1334decused:
1335 shmem_inode_unacct_blocks(inode, 1);
1336failed:
1337 if (swap.val && !shmem_confirm_swap(mapping, index, swap))
1338 error = -EEXIST;
1339unlock:
1340 if (page) {
1341 unlock_page(page);
1342 page_cache_release(page);
1343 }
1344 if (error == -ENOSPC && !once++) {
1345 info = SHMEM_I(inode);
1346 spin_lock(&info->lock);
1347 shmem_recalc_inode(inode);
1348 spin_unlock(&info->lock);
1349 goto repeat;
1350 }
1351 if (error == -EEXIST)
1352 goto repeat;
1353 return error;
1354}
1355
1356
1357
1358
1359
1360
1361static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
1362{
1363 int ret = default_wake_function(wait, mode, sync, key);
1364 list_del_init(&wait->task_list);
1365 return ret;
1366}
1367
1368static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1369{
1370 struct inode *inode = file_inode(vma->vm_file);
1371 gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
1372 int error;
1373 int ret = VM_FAULT_LOCKED;
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 if (unlikely(inode->i_private)) {
1393 struct shmem_falloc *shmem_falloc;
1394
1395 spin_lock(&inode->i_lock);
1396 shmem_falloc = inode->i_private;
1397 if (shmem_falloc &&
1398 shmem_falloc->waitq &&
1399 vmf->pgoff >= shmem_falloc->start &&
1400 vmf->pgoff < shmem_falloc->next) {
1401 wait_queue_head_t *shmem_falloc_waitq;
1402 DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
1403
1404 ret = VM_FAULT_NOPAGE;
1405 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1406 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1407
1408 up_read(&vma->vm_mm->mmap_sem);
1409 ret = VM_FAULT_RETRY;
1410 }
1411
1412 shmem_falloc_waitq = shmem_falloc->waitq;
1413 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1414 TASK_UNINTERRUPTIBLE);
1415 spin_unlock(&inode->i_lock);
1416 schedule();
1417
1418
1419
1420
1421
1422
1423
1424
1425 spin_lock(&inode->i_lock);
1426 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1427 spin_unlock(&inode->i_lock);
1428 return ret;
1429 }
1430 spin_unlock(&inode->i_lock);
1431 }
1432
1433 error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
1434 gfp, vma, vmf, &ret);
1435 if (error)
1436 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1437
1438 if (ret & VM_FAULT_MAJOR) {
1439 count_vm_event(PGMAJFAULT);
1440 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1441 }
1442 return ret;
1443}
1444
1445#ifdef CONFIG_NUMA
1446static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1447{
1448 struct inode *inode = file_inode(vma->vm_file);
1449 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1450}
1451
1452static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1453 unsigned long addr)
1454{
1455 struct inode *inode = file_inode(vma->vm_file);
1456 pgoff_t index;
1457
1458 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1459 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1460}
1461#endif
1462
1463int shmem_lock(struct file *file, int lock, struct user_struct *user)
1464{
1465 struct inode *inode = file_inode(file);
1466 struct shmem_inode_info *info = SHMEM_I(inode);
1467 int retval = -ENOMEM;
1468
1469 spin_lock(&info->lock);
1470 if (lock && !(info->flags & VM_LOCKED)) {
1471 if (!user_shm_lock(inode->i_size, user))
1472 goto out_nomem;
1473 info->flags |= VM_LOCKED;
1474 mapping_set_unevictable(file->f_mapping);
1475 }
1476 if (!lock && (info->flags & VM_LOCKED) && user) {
1477 user_shm_unlock(inode->i_size, user);
1478 info->flags &= ~VM_LOCKED;
1479 mapping_clear_unevictable(file->f_mapping);
1480 }
1481 retval = 0;
1482
1483out_nomem:
1484 spin_unlock(&info->lock);
1485 return retval;
1486}
1487
1488static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1489{
1490 file_accessed(file);
1491 vma->vm_ops = &shmem_vm_ops;
1492 return 0;
1493}
1494
1495static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1496 umode_t mode, dev_t dev, unsigned long flags)
1497{
1498 struct inode *inode;
1499 struct shmem_inode_info *info;
1500 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1501
1502 if (shmem_reserve_inode(sb))
1503 return NULL;
1504
1505 inode = new_inode(sb);
1506 if (inode) {
1507 inode->i_ino = get_next_ino();
1508 inode_init_owner(inode, dir, mode);
1509 inode->i_blocks = 0;
1510 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1511 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1512 inode->i_generation = get_seconds();
1513 info = SHMEM_I(inode);
1514 memset(info, 0, (char *)inode - (char *)info);
1515 spin_lock_init(&info->lock);
1516 info->seals = F_SEAL_SEAL;
1517 info->flags = flags & VM_NORESERVE;
1518 INIT_LIST_HEAD(&info->swaplist);
1519 simple_xattrs_init(&info->xattrs);
1520 cache_no_acl(inode);
1521
1522 switch (mode & S_IFMT) {
1523 default:
1524 inode->i_op = &shmem_special_inode_operations;
1525 init_special_inode(inode, mode, dev);
1526 break;
1527 case S_IFREG:
1528 inode->i_mapping->a_ops = &shmem_aops;
1529 inode->i_op = &shmem_inode_operations;
1530 inode->i_fop = &shmem_file_operations;
1531 mpol_shared_policy_init(&info->policy,
1532 shmem_get_sbmpol(sbinfo));
1533 break;
1534 case S_IFDIR:
1535 inc_nlink(inode);
1536
1537 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1538 inode->i_op = &shmem_dir_inode_operations.ops;
1539 inode->i_fop = &simple_dir_operations;
1540 inode->i_flags |= S_IOPS_WRAPPER;
1541 break;
1542 case S_IFLNK:
1543
1544
1545
1546
1547 mpol_shared_policy_init(&info->policy, NULL);
1548 break;
1549 }
1550 } else
1551 shmem_free_inode(sb);
1552 return inode;
1553}
1554
1555bool shmem_mapping(struct address_space *mapping)
1556{
1557 return mapping->backing_dev_info == &shmem_backing_dev_info;
1558}
1559
1560static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
1561 pmd_t *dst_pmd,
1562 struct vm_area_struct *dst_vma,
1563 unsigned long dst_addr,
1564 unsigned long src_addr,
1565 bool zeropage,
1566 struct page **pagep)
1567{
1568 struct inode *inode = file_inode(dst_vma->vm_file);
1569 struct shmem_inode_info *info = SHMEM_I(inode);
1570 struct address_space *mapping = inode->i_mapping;
1571 gfp_t gfp = mapping_gfp_mask(mapping);
1572 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
1573 spinlock_t *ptl;
1574 void *page_kaddr;
1575 struct page *page;
1576 pte_t _dst_pte, *dst_pte;
1577 int ret;
1578
1579 ret = -ENOMEM;
1580 if (!shmem_inode_acct_block(inode, 1))
1581 goto out;
1582
1583 if (!*pagep) {
1584 page = shmem_alloc_page(gfp, info, pgoff);
1585 if (!page)
1586 goto out_unacct_blocks;
1587
1588 if (!zeropage) {
1589 page_kaddr = kmap_atomic(page);
1590 ret = copy_from_user(page_kaddr,
1591 (const void __user *)src_addr,
1592 PAGE_SIZE);
1593 kunmap_atomic(page_kaddr);
1594
1595
1596 if (unlikely(ret)) {
1597 *pagep = page;
1598 shmem_inode_unacct_blocks(inode, 1);
1599
1600 return -EFAULT;
1601 }
1602 } else {
1603 clear_highpage(page);
1604 }
1605 } else {
1606 page = *pagep;
1607 *pagep = NULL;
1608 }
1609
1610 VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
1611 __set_page_locked(page);
1612 __SetPageSwapBacked(page);
1613 __SetPageUptodate(page);
1614
1615 ret = mem_cgroup_cache_charge(page, dst_mm,
1616 gfp & GFP_RECLAIM_MASK);
1617 if (ret)
1618 goto out_release;
1619
1620 ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1621 if (!ret) {
1622 ret = shmem_add_to_page_cache(page, mapping, pgoff, gfp, NULL);
1623 radix_tree_preload_end();
1624 }
1625 if (ret)
1626 goto out_release_uncharge;
1627
1628 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
1629 if (dst_vma->vm_flags & VM_WRITE)
1630 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
1631
1632 ret = -EEXIST;
1633 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
1634 if (!pte_none(*dst_pte))
1635 goto out_release_uncharge_unlock;
1636
1637 lru_cache_add_anon(page);
1638
1639 spin_lock(&info->lock);
1640 info->alloced++;
1641 inode->i_blocks += BLOCKS_PER_PAGE;
1642 shmem_recalc_inode(inode);
1643 spin_unlock(&info->lock);
1644
1645 inc_mm_counter(dst_mm, mm_counter_file(page));
1646 page_add_file_rmap(page);
1647 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
1648
1649
1650 update_mmu_cache(dst_vma, dst_addr, dst_pte);
1651 unlock_page(page);
1652 pte_unmap_unlock(dst_pte, ptl);
1653 ret = 0;
1654out:
1655 return ret;
1656out_release_uncharge_unlock:
1657 pte_unmap_unlock(dst_pte, ptl);
1658out_release_uncharge:
1659 mem_cgroup_uncharge_cache_page(page);
1660out_release:
1661 unlock_page(page);
1662 put_page(page);
1663out_unacct_blocks:
1664 shmem_inode_unacct_blocks(inode, 1);
1665 goto out;
1666}
1667
1668int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
1669 pmd_t *dst_pmd,
1670 struct vm_area_struct *dst_vma,
1671 unsigned long dst_addr,
1672 unsigned long src_addr,
1673 struct page **pagep)
1674{
1675 return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
1676 dst_addr, src_addr, false, pagep);
1677}
1678
1679int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
1680 pmd_t *dst_pmd,
1681 struct vm_area_struct *dst_vma,
1682 unsigned long dst_addr)
1683{
1684 struct page *page = NULL;
1685
1686 return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
1687 dst_addr, 0, true, &page);
1688}
1689
1690#ifdef CONFIG_TMPFS
1691static const struct inode_operations shmem_symlink_inode_operations;
1692static const struct inode_operations shmem_short_symlink_operations;
1693
1694#ifdef CONFIG_TMPFS_XATTR
1695static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1696#else
1697#define shmem_initxattrs NULL
1698#endif
1699
1700static int
1701shmem_write_begin(struct file *file, struct address_space *mapping,
1702 loff_t pos, unsigned len, unsigned flags,
1703 struct page **pagep, void **fsdata)
1704{
1705 struct inode *inode = mapping->host;
1706 struct shmem_inode_info *info = SHMEM_I(inode);
1707 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1708
1709
1710 if (unlikely(info->seals)) {
1711 if (info->seals & F_SEAL_WRITE)
1712 return -EPERM;
1713 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1714 return -EPERM;
1715 }
1716
1717 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1718}
1719
1720static int
1721shmem_write_end(struct file *file, struct address_space *mapping,
1722 loff_t pos, unsigned len, unsigned copied,
1723 struct page *page, void *fsdata)
1724{
1725 struct inode *inode = mapping->host;
1726
1727 if (pos + copied > inode->i_size)
1728 i_size_write(inode, pos + copied);
1729
1730 if (!PageUptodate(page)) {
1731 if (copied < PAGE_CACHE_SIZE) {
1732 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1733 zero_user_segments(page, 0, from,
1734 from + copied, PAGE_CACHE_SIZE);
1735 }
1736 SetPageUptodate(page);
1737 }
1738 set_page_dirty(page);
1739 unlock_page(page);
1740 page_cache_release(page);
1741
1742 return copied;
1743}
1744
1745static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1746{
1747 struct inode *inode = file_inode(filp);
1748 struct address_space *mapping = inode->i_mapping;
1749 pgoff_t index;
1750 unsigned long offset;
1751 enum sgp_type sgp = SGP_READ;
1752
1753
1754
1755
1756
1757
1758 if (segment_eq(get_fs(), KERNEL_DS))
1759 sgp = SGP_DIRTY;
1760
1761 index = *ppos >> PAGE_CACHE_SHIFT;
1762 offset = *ppos & ~PAGE_CACHE_MASK;
1763
1764 for (;;) {
1765 struct page *page = NULL;
1766 pgoff_t end_index;
1767 unsigned long nr, ret;
1768 loff_t i_size = i_size_read(inode);
1769
1770 end_index = i_size >> PAGE_CACHE_SHIFT;
1771 if (index > end_index)
1772 break;
1773 if (index == end_index) {
1774 nr = i_size & ~PAGE_CACHE_MASK;
1775 if (nr <= offset)
1776 break;
1777 }
1778
1779 desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1780 if (desc->error) {
1781 if (desc->error == -EINVAL)
1782 desc->error = 0;
1783 break;
1784 }
1785 if (page)
1786 unlock_page(page);
1787
1788
1789
1790
1791
1792 nr = PAGE_CACHE_SIZE;
1793 i_size = i_size_read(inode);
1794 end_index = i_size >> PAGE_CACHE_SHIFT;
1795 if (index == end_index) {
1796 nr = i_size & ~PAGE_CACHE_MASK;
1797 if (nr <= offset) {
1798 if (page)
1799 page_cache_release(page);
1800 break;
1801 }
1802 }
1803 nr -= offset;
1804
1805 if (page) {
1806
1807
1808
1809
1810
1811 if (mapping_writably_mapped(mapping))
1812 flush_dcache_page(page);
1813
1814
1815
1816 if (!offset)
1817 mark_page_accessed(page);
1818 } else {
1819 page = ZERO_PAGE(0);
1820 page_cache_get(page);
1821 }
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833 ret = actor(desc, page, offset, nr);
1834 offset += ret;
1835 index += offset >> PAGE_CACHE_SHIFT;
1836 offset &= ~PAGE_CACHE_MASK;
1837
1838 page_cache_release(page);
1839 if (ret != nr || !desc->count)
1840 break;
1841
1842 cond_resched();
1843 }
1844
1845 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1846 file_accessed(filp);
1847}
1848
1849static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1850 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1851{
1852 struct file *filp = iocb->ki_filp;
1853 ssize_t retval;
1854 unsigned long seg;
1855 size_t count;
1856 loff_t *ppos = &iocb->ki_pos;
1857
1858 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1859 if (retval)
1860 return retval;
1861
1862 for (seg = 0; seg < nr_segs; seg++) {
1863 read_descriptor_t desc;
1864
1865 desc.written = 0;
1866 desc.arg.buf = iov[seg].iov_base;
1867 desc.count = iov[seg].iov_len;
1868 if (desc.count == 0)
1869 continue;
1870 desc.error = 0;
1871 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1872 retval += desc.written;
1873 if (desc.error) {
1874 retval = retval ?: desc.error;
1875 break;
1876 }
1877 if (desc.count > 0)
1878 break;
1879 }
1880 return retval;
1881}
1882
1883static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1884 struct pipe_inode_info *pipe, size_t len,
1885 unsigned int flags)
1886{
1887 struct address_space *mapping = in->f_mapping;
1888 struct inode *inode = mapping->host;
1889 unsigned int loff, nr_pages, req_pages;
1890 struct page *pages[PIPE_DEF_BUFFERS];
1891 struct partial_page partial[PIPE_DEF_BUFFERS];
1892 struct page *page;
1893 pgoff_t index, end_index;
1894 loff_t isize, left;
1895 int error, page_nr;
1896 struct splice_pipe_desc spd = {
1897 .pages = pages,
1898 .partial = partial,
1899 .nr_pages_max = PIPE_DEF_BUFFERS,
1900 .flags = flags,
1901 .ops = &page_cache_pipe_buf_ops,
1902 .spd_release = spd_release_page,
1903 };
1904
1905 isize = i_size_read(inode);
1906 if (unlikely(*ppos >= isize))
1907 return 0;
1908
1909 left = isize - *ppos;
1910 if (unlikely(left < len))
1911 len = left;
1912
1913 if (splice_grow_spd(pipe, &spd))
1914 return -ENOMEM;
1915
1916 index = *ppos >> PAGE_CACHE_SHIFT;
1917 loff = *ppos & ~PAGE_CACHE_MASK;
1918 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1919 nr_pages = min(req_pages, pipe->buffers);
1920
1921 spd.nr_pages = find_get_pages_contig(mapping, index,
1922 nr_pages, spd.pages);
1923 index += spd.nr_pages;
1924 error = 0;
1925
1926 while (spd.nr_pages < nr_pages) {
1927 error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1928 if (error)
1929 break;
1930 unlock_page(page);
1931 spd.pages[spd.nr_pages++] = page;
1932 index++;
1933 }
1934
1935 index = *ppos >> PAGE_CACHE_SHIFT;
1936 nr_pages = spd.nr_pages;
1937 spd.nr_pages = 0;
1938
1939 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1940 unsigned int this_len;
1941
1942 if (!len)
1943 break;
1944
1945 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1946 page = spd.pages[page_nr];
1947
1948 if (!PageUptodate(page) || page->mapping != mapping) {
1949 error = shmem_getpage(inode, index, &page,
1950 SGP_CACHE, NULL);
1951 if (error)
1952 break;
1953 unlock_page(page);
1954 page_cache_release(spd.pages[page_nr]);
1955 spd.pages[page_nr] = page;
1956 }
1957
1958 isize = i_size_read(inode);
1959 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1960 if (unlikely(!isize || index > end_index))
1961 break;
1962
1963 if (end_index == index) {
1964 unsigned int plen;
1965
1966 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1967 if (plen <= loff)
1968 break;
1969
1970 this_len = min(this_len, plen - loff);
1971 len = this_len;
1972 }
1973
1974 spd.partial[page_nr].offset = loff;
1975 spd.partial[page_nr].len = this_len;
1976 len -= this_len;
1977 loff = 0;
1978 spd.nr_pages++;
1979 index++;
1980 }
1981
1982 while (page_nr < nr_pages)
1983 page_cache_release(spd.pages[page_nr++]);
1984
1985 if (spd.nr_pages)
1986 error = splice_to_pipe(pipe, &spd);
1987
1988 splice_shrink_spd(&spd);
1989
1990 if (error > 0) {
1991 *ppos += error;
1992 file_accessed(in);
1993 }
1994 return error;
1995}
1996
1997
1998
1999
2000static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
2001 pgoff_t index, pgoff_t end, int whence)
2002{
2003 struct page *page;
2004 struct pagevec pvec;
2005 pgoff_t indices[PAGEVEC_SIZE];
2006 bool done = false;
2007 int i;
2008
2009 pagevec_init(&pvec, 0);
2010 pvec.nr = 1;
2011 while (!done) {
2012 pvec.nr = __find_get_pages(mapping, index,
2013 pvec.nr, pvec.pages, indices);
2014 if (!pvec.nr) {
2015 if (whence == SEEK_DATA)
2016 index = end;
2017 break;
2018 }
2019 for (i = 0; i < pvec.nr; i++, index++) {
2020 if (index < indices[i]) {
2021 if (whence == SEEK_HOLE) {
2022 done = true;
2023 break;
2024 }
2025 index = indices[i];
2026 }
2027 page = pvec.pages[i];
2028 if (page && !radix_tree_exceptional_entry(page)) {
2029 if (!PageUptodate(page))
2030 page = NULL;
2031 }
2032 if (index >= end ||
2033 (page && whence == SEEK_DATA) ||
2034 (!page && whence == SEEK_HOLE)) {
2035 done = true;
2036 break;
2037 }
2038 }
2039 pagevec_remove_exceptionals(&pvec);
2040 pagevec_release(&pvec);
2041 pvec.nr = PAGEVEC_SIZE;
2042 cond_resched();
2043 }
2044 return index;
2045}
2046
2047static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
2048{
2049 struct address_space *mapping = file->f_mapping;
2050 struct inode *inode = mapping->host;
2051 pgoff_t start, end;
2052 loff_t new_offset;
2053
2054 if (whence != SEEK_DATA && whence != SEEK_HOLE)
2055 return generic_file_llseek_size(file, offset, whence,
2056 MAX_LFS_FILESIZE, i_size_read(inode));
2057 mutex_lock(&inode->i_mutex);
2058
2059
2060 if (offset < 0)
2061 offset = -EINVAL;
2062 else if (offset >= inode->i_size)
2063 offset = -ENXIO;
2064 else {
2065 start = offset >> PAGE_CACHE_SHIFT;
2066 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2067 new_offset = shmem_seek_hole_data(mapping, start, end, whence);
2068 new_offset <<= PAGE_CACHE_SHIFT;
2069 if (new_offset > offset) {
2070 if (new_offset < inode->i_size)
2071 offset = new_offset;
2072 else if (whence == SEEK_DATA)
2073 offset = -ENXIO;
2074 else
2075 offset = inode->i_size;
2076 }
2077 }
2078
2079 if (offset >= 0)
2080 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
2081 mutex_unlock(&inode->i_mutex);
2082 return offset;
2083}
2084
2085
2086
2087
2088
2089#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
2090#define LAST_SCAN 4
2091
2092static void shmem_tag_pins(struct address_space *mapping)
2093{
2094 struct radix_tree_iter iter;
2095 void **slot;
2096 pgoff_t start;
2097 struct page *page;
2098
2099 lru_add_drain();
2100 start = 0;
2101 rcu_read_lock();
2102
2103 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
2104 page = radix_tree_deref_slot(slot);
2105 if (!page || radix_tree_exception(page)) {
2106 if (radix_tree_deref_retry(page)) {
2107 slot = radix_tree_iter_retry(&iter);
2108 continue;
2109 }
2110 } else if (page_count(page) - page_mapcount(page) > 1) {
2111 spin_lock_irq(&mapping->tree_lock);
2112 radix_tree_tag_set(&mapping->page_tree, iter.index,
2113 SHMEM_TAG_PINNED);
2114 spin_unlock_irq(&mapping->tree_lock);
2115 }
2116
2117 if (need_resched()) {
2118 cond_resched_rcu();
2119 slot = radix_tree_iter_next(&iter);
2120 }
2121 }
2122 rcu_read_unlock();
2123}
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134static int shmem_wait_for_pins(struct address_space *mapping)
2135{
2136 struct radix_tree_iter iter;
2137 void **slot;
2138 pgoff_t start;
2139 struct page *page;
2140 int error, scan;
2141
2142 shmem_tag_pins(mapping);
2143
2144 error = 0;
2145 for (scan = 0; scan <= LAST_SCAN; scan++) {
2146 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
2147 break;
2148
2149 if (!scan)
2150 lru_add_drain_all();
2151 else if (schedule_timeout_killable((HZ << scan) / 200))
2152 scan = LAST_SCAN;
2153
2154 start = 0;
2155 rcu_read_lock();
2156 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
2157 start, SHMEM_TAG_PINNED) {
2158
2159 page = radix_tree_deref_slot(slot);
2160 if (radix_tree_exception(page)) {
2161 if (radix_tree_deref_retry(page)) {
2162 slot = radix_tree_iter_retry(&iter);
2163 continue;
2164 }
2165
2166 page = NULL;
2167 }
2168
2169 if (page &&
2170 page_count(page) - page_mapcount(page) != 1) {
2171 if (scan < LAST_SCAN)
2172 goto continue_resched;
2173
2174
2175
2176
2177
2178
2179 error = -EBUSY;
2180 }
2181
2182 spin_lock_irq(&mapping->tree_lock);
2183 radix_tree_tag_clear(&mapping->page_tree,
2184 iter.index, SHMEM_TAG_PINNED);
2185 spin_unlock_irq(&mapping->tree_lock);
2186continue_resched:
2187 if (need_resched()) {
2188 cond_resched_rcu();
2189 slot = radix_tree_iter_next(&iter);
2190 }
2191 }
2192 rcu_read_unlock();
2193 }
2194
2195 return error;
2196}
2197
2198#define F_ALL_SEALS (F_SEAL_SEAL | \
2199 F_SEAL_SHRINK | \
2200 F_SEAL_GROW | \
2201 F_SEAL_WRITE)
2202
2203int shmem_add_seals(struct file *file, unsigned int seals)
2204{
2205 struct inode *inode = file_inode(file);
2206 struct shmem_inode_info *info = SHMEM_I(inode);
2207 int error;
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239 if (file->f_op != &shmem_file_operations)
2240 return -EINVAL;
2241 if (!(file->f_mode & FMODE_WRITE))
2242 return -EPERM;
2243 if (seals & ~(unsigned int)F_ALL_SEALS)
2244 return -EINVAL;
2245
2246 mutex_lock(&inode->i_mutex);
2247
2248 if (info->seals & F_SEAL_SEAL) {
2249 error = -EPERM;
2250 goto unlock;
2251 }
2252
2253 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
2254 error = mapping_deny_writable(file->f_mapping);
2255 if (error)
2256 goto unlock;
2257
2258 error = shmem_wait_for_pins(file->f_mapping);
2259 if (error) {
2260 mapping_allow_writable(file->f_mapping);
2261 goto unlock;
2262 }
2263 }
2264
2265 info->seals |= seals;
2266 error = 0;
2267
2268unlock:
2269 mutex_unlock(&inode->i_mutex);
2270 return error;
2271}
2272EXPORT_SYMBOL_GPL(shmem_add_seals);
2273
2274int shmem_get_seals(struct file *file)
2275{
2276 if (file->f_op != &shmem_file_operations)
2277 return -EINVAL;
2278
2279 return SHMEM_I(file_inode(file))->seals;
2280}
2281EXPORT_SYMBOL_GPL(shmem_get_seals);
2282
2283long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2284{
2285 long error;
2286
2287 switch (cmd) {
2288 case F_ADD_SEALS:
2289
2290 if (arg > UINT_MAX)
2291 return -EINVAL;
2292
2293 error = shmem_add_seals(file, arg);
2294 break;
2295 case F_GET_SEALS:
2296 error = shmem_get_seals(file);
2297 break;
2298 default:
2299 error = -EINVAL;
2300 break;
2301 }
2302
2303 return error;
2304}
2305
2306static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2307 loff_t len)
2308{
2309 struct inode *inode = file_inode(file);
2310 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2311 struct shmem_inode_info *info = SHMEM_I(inode);
2312 struct shmem_falloc shmem_falloc;
2313 pgoff_t start, index, end;
2314 int error;
2315
2316 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2317 return -EOPNOTSUPP;
2318
2319 mutex_lock(&inode->i_mutex);
2320
2321 if (mode & FALLOC_FL_PUNCH_HOLE) {
2322 struct address_space *mapping = file->f_mapping;
2323 loff_t unmap_start = round_up(offset, PAGE_SIZE);
2324 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
2325 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
2326
2327
2328 if (info->seals & F_SEAL_WRITE) {
2329 error = -EPERM;
2330 goto out;
2331 }
2332
2333 shmem_falloc.waitq = &shmem_falloc_waitq;
2334 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
2335 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
2336 spin_lock(&inode->i_lock);
2337 inode->i_private = &shmem_falloc;
2338 spin_unlock(&inode->i_lock);
2339
2340 if ((u64)unmap_end > (u64)unmap_start)
2341 unmap_mapping_range(mapping, unmap_start,
2342 1 + unmap_end - unmap_start, 0);
2343 shmem_truncate_range(inode, offset, offset + len - 1);
2344
2345
2346 spin_lock(&inode->i_lock);
2347 inode->i_private = NULL;
2348 wake_up_all(&shmem_falloc_waitq);
2349 WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
2350 spin_unlock(&inode->i_lock);
2351 error = 0;
2352 goto out;
2353 }
2354
2355
2356 error = inode_newsize_ok(inode, offset + len);
2357 if (error)
2358 goto out;
2359
2360 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2361 error = -EPERM;
2362 goto out;
2363 }
2364
2365 start = offset >> PAGE_CACHE_SHIFT;
2366 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2367
2368 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
2369 error = -ENOSPC;
2370 goto out;
2371 }
2372
2373 shmem_falloc.waitq = NULL;
2374 shmem_falloc.start = start;
2375 shmem_falloc.next = start;
2376 shmem_falloc.nr_falloced = 0;
2377 shmem_falloc.nr_unswapped = 0;
2378 spin_lock(&inode->i_lock);
2379 inode->i_private = &shmem_falloc;
2380 spin_unlock(&inode->i_lock);
2381
2382 for (index = start; index < end; index++) {
2383 struct page *page;
2384
2385
2386
2387
2388
2389 if (signal_pending(current))
2390 error = -EINTR;
2391 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
2392 error = -ENOMEM;
2393 else
2394 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
2395 NULL);
2396 if (error) {
2397
2398 shmem_undo_range(inode,
2399 (loff_t)start << PAGE_CACHE_SHIFT,
2400 (loff_t)index << PAGE_CACHE_SHIFT, true);
2401 goto undone;
2402 }
2403
2404
2405
2406
2407
2408 shmem_falloc.next++;
2409 if (!PageUptodate(page))
2410 shmem_falloc.nr_falloced++;
2411
2412
2413
2414
2415
2416
2417
2418
2419 set_page_dirty(page);
2420 unlock_page(page);
2421 page_cache_release(page);
2422 cond_resched();
2423 }
2424
2425 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
2426 i_size_write(inode, offset + len);
2427 inode->i_ctime = CURRENT_TIME;
2428undone:
2429 spin_lock(&inode->i_lock);
2430 inode->i_private = NULL;
2431 spin_unlock(&inode->i_lock);
2432out:
2433 mutex_unlock(&inode->i_mutex);
2434 return error;
2435}
2436
2437static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
2438{
2439 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
2440
2441 buf->f_type = TMPFS_MAGIC;
2442 buf->f_bsize = PAGE_CACHE_SIZE;
2443 buf->f_namelen = NAME_MAX;
2444 if (sbinfo->max_blocks) {
2445 buf->f_blocks = sbinfo->max_blocks;
2446 buf->f_bavail =
2447 buf->f_bfree = sbinfo->max_blocks -
2448 percpu_counter_sum(&sbinfo->used_blocks);
2449 }
2450 if (sbinfo->max_inodes) {
2451 buf->f_files = sbinfo->max_inodes;
2452 buf->f_ffree = sbinfo->free_inodes;
2453 }
2454
2455 return 0;
2456}
2457
2458
2459
2460
2461static int
2462shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2463{
2464 struct inode *inode;
2465 int error = -ENOSPC;
2466
2467 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
2468 if (inode) {
2469 error = security_inode_init_security(inode, dir,
2470 &dentry->d_name,
2471 shmem_initxattrs, NULL);
2472 if (error) {
2473 if (error != -EOPNOTSUPP) {
2474 iput(inode);
2475 return error;
2476 }
2477 }
2478#ifdef CONFIG_TMPFS_POSIX_ACL
2479 error = generic_acl_init(inode, dir);
2480 if (error) {
2481 iput(inode);
2482 return error;
2483 }
2484#else
2485 error = 0;
2486#endif
2487 dir->i_size += BOGO_DIRENT_SIZE;
2488 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2489 d_instantiate(dentry, inode);
2490 dget(dentry);
2491 }
2492 return error;
2493}
2494
2495static int
2496shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2497{
2498 struct inode *inode;
2499 int error = -ENOSPC;
2500
2501 inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
2502 if (inode) {
2503 error = security_inode_init_security(inode, dir,
2504 NULL,
2505 shmem_initxattrs, NULL);
2506 if (error) {
2507 if (error != -EOPNOTSUPP) {
2508 iput(inode);
2509 return error;
2510 }
2511 }
2512#ifdef CONFIG_TMPFS_POSIX_ACL
2513 error = generic_acl_init(inode, dir);
2514 if (error) {
2515 iput(inode);
2516 return error;
2517 }
2518#else
2519 error = 0;
2520#endif
2521 d_tmpfile(dentry, inode);
2522 }
2523 return error;
2524}
2525
2526static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2527{
2528 int error;
2529
2530 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
2531 return error;
2532 inc_nlink(dir);
2533 return 0;
2534}
2535
2536static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2537 bool excl)
2538{
2539 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
2540}
2541
2542
2543
2544
2545static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2546{
2547 struct inode *inode = old_dentry->d_inode;
2548 int ret;
2549
2550
2551
2552
2553
2554
2555 ret = shmem_reserve_inode(inode->i_sb);
2556 if (ret)
2557 goto out;
2558
2559 dir->i_size += BOGO_DIRENT_SIZE;
2560 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2561 inc_nlink(inode);
2562 ihold(inode);
2563 dget(dentry);
2564 d_instantiate(dentry, inode);
2565out:
2566 return ret;
2567}
2568
2569static int shmem_unlink(struct inode *dir, struct dentry *dentry)
2570{
2571 struct inode *inode = dentry->d_inode;
2572
2573 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
2574 shmem_free_inode(inode->i_sb);
2575
2576 dir->i_size -= BOGO_DIRENT_SIZE;
2577 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2578 drop_nlink(inode);
2579 dput(dentry);
2580 return 0;
2581}
2582
2583static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
2584{
2585 if (!simple_empty(dentry))
2586 return -ENOTEMPTY;
2587
2588 drop_nlink(dentry->d_inode);
2589 drop_nlink(dir);
2590 return shmem_unlink(dir, dentry);
2591}
2592
2593static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2594{
2595 bool old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2596 bool new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode);
2597
2598 if (old_dir != new_dir && old_is_dir != new_is_dir) {
2599 if (old_is_dir) {
2600 drop_nlink(old_dir);
2601 inc_nlink(new_dir);
2602 } else {
2603 drop_nlink(new_dir);
2604 inc_nlink(old_dir);
2605 }
2606 }
2607 old_dir->i_ctime = old_dir->i_mtime =
2608 new_dir->i_ctime = new_dir->i_mtime =
2609 old_dentry->d_inode->i_ctime =
2610 new_dentry->d_inode->i_ctime = CURRENT_TIME;
2611
2612 return 0;
2613}
2614
2615static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2616{
2617 struct dentry *whiteout;
2618 int error;
2619
2620 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2621 if (!whiteout)
2622 return -ENOMEM;
2623
2624 error = shmem_mknod(old_dir, whiteout,
2625 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2626 dput(whiteout);
2627 if (error)
2628 return error;
2629
2630
2631
2632
2633
2634
2635
2636
2637 d_rehash(whiteout);
2638 return 0;
2639}
2640
2641
2642
2643
2644
2645
2646
2647static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
2648{
2649 struct inode *inode = old_dentry->d_inode;
2650 int they_are_dirs = S_ISDIR(inode->i_mode);
2651
2652 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2653 return -EINVAL;
2654
2655 if (flags & RENAME_EXCHANGE)
2656 return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
2657
2658 if (!simple_empty(new_dentry))
2659 return -ENOTEMPTY;
2660
2661 if (flags & RENAME_WHITEOUT) {
2662 int error;
2663
2664 error = shmem_whiteout(old_dir, old_dentry);
2665 if (error)
2666 return error;
2667 }
2668
2669 if (new_dentry->d_inode) {
2670 (void) shmem_unlink(new_dir, new_dentry);
2671 if (they_are_dirs)
2672 drop_nlink(old_dir);
2673 } else if (they_are_dirs) {
2674 drop_nlink(old_dir);
2675 inc_nlink(new_dir);
2676 }
2677
2678 old_dir->i_size -= BOGO_DIRENT_SIZE;
2679 new_dir->i_size += BOGO_DIRENT_SIZE;
2680 old_dir->i_ctime = old_dir->i_mtime =
2681 new_dir->i_ctime = new_dir->i_mtime =
2682 inode->i_ctime = CURRENT_TIME;
2683 return 0;
2684}
2685
2686static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2687{
2688 return shmem_rename2(old_dir, old_dentry, new_dir, new_dentry, 0);
2689}
2690
2691static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2692{
2693 int error;
2694 int len;
2695 struct inode *inode;
2696 struct page *page;
2697 char *kaddr;
2698 struct shmem_inode_info *info;
2699
2700 len = strlen(symname) + 1;
2701 if (len > PAGE_CACHE_SIZE)
2702 return -ENAMETOOLONG;
2703
2704 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
2705 if (!inode)
2706 return -ENOSPC;
2707
2708 error = security_inode_init_security(inode, dir, &dentry->d_name,
2709 shmem_initxattrs, NULL);
2710 if (error) {
2711 if (error != -EOPNOTSUPP) {
2712 iput(inode);
2713 return error;
2714 }
2715 error = 0;
2716 }
2717
2718 info = SHMEM_I(inode);
2719 inode->i_size = len-1;
2720 if (len <= SHORT_SYMLINK_LEN) {
2721 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2722 if (!info->symlink) {
2723 iput(inode);
2724 return -ENOMEM;
2725 }
2726 inode->i_op = &shmem_short_symlink_operations;
2727 } else {
2728 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2729 if (error) {
2730 iput(inode);
2731 return error;
2732 }
2733 inode->i_mapping->a_ops = &shmem_aops;
2734 inode->i_op = &shmem_symlink_inode_operations;
2735 kaddr = kmap_atomic(page);
2736 memcpy(kaddr, symname, len);
2737 kunmap_atomic(kaddr);
2738 SetPageUptodate(page);
2739 set_page_dirty(page);
2740 unlock_page(page);
2741 page_cache_release(page);
2742 }
2743 dir->i_size += BOGO_DIRENT_SIZE;
2744 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2745 d_instantiate(dentry, inode);
2746 dget(dentry);
2747 return 0;
2748}
2749
2750static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2751{
2752 nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2753 return NULL;
2754}
2755
2756static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2757{
2758 struct page *page = NULL;
2759 int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2760 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2761 if (page)
2762 unlock_page(page);
2763 return page;
2764}
2765
2766static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2767{
2768 if (!IS_ERR(nd_get_link(nd))) {
2769 struct page *page = cookie;
2770 kunmap(page);
2771 mark_page_accessed(page);
2772 page_cache_release(page);
2773 }
2774}
2775
2776#ifdef CONFIG_TMPFS_XATTR
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787static int shmem_initxattrs(struct inode *inode,
2788 const struct xattr *xattr_array,
2789 void *fs_info)
2790{
2791 struct shmem_inode_info *info = SHMEM_I(inode);
2792 const struct xattr *xattr;
2793 struct simple_xattr *new_xattr;
2794 size_t len;
2795
2796 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
2797 new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
2798 if (!new_xattr)
2799 return -ENOMEM;
2800
2801 len = strlen(xattr->name) + 1;
2802 new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
2803 GFP_KERNEL);
2804 if (!new_xattr->name) {
2805 kfree(new_xattr);
2806 return -ENOMEM;
2807 }
2808
2809 memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
2810 XATTR_SECURITY_PREFIX_LEN);
2811 memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
2812 xattr->name, len);
2813
2814 simple_xattr_list_add(&info->xattrs, new_xattr);
2815 }
2816
2817 return 0;
2818}
2819
2820static const struct xattr_handler *shmem_xattr_handlers[] = {
2821#ifdef CONFIG_TMPFS_POSIX_ACL
2822 &generic_acl_access_handler,
2823 &generic_acl_default_handler,
2824#endif
2825 NULL
2826};
2827
2828static int shmem_xattr_validate(const char *name)
2829{
2830 struct { const char *prefix; size_t len; } arr[] = {
2831 { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
2832 { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
2833 };
2834 int i;
2835
2836 for (i = 0; i < ARRAY_SIZE(arr); i++) {
2837 size_t preflen = arr[i].len;
2838 if (strncmp(name, arr[i].prefix, preflen) == 0) {
2839 if (!name[preflen])
2840 return -EINVAL;
2841 return 0;
2842 }
2843 }
2844 return -EOPNOTSUPP;
2845}
2846
2847static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
2848 void *buffer, size_t size)
2849{
2850 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2851 int err;
2852
2853
2854
2855
2856
2857
2858 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2859 return generic_getxattr(dentry, name, buffer, size);
2860
2861 err = shmem_xattr_validate(name);
2862 if (err)
2863 return err;
2864
2865 return simple_xattr_get(&info->xattrs, name, buffer, size);
2866}
2867
2868static int shmem_setxattr(struct dentry *dentry, const char *name,
2869 const void *value, size_t size, int flags)
2870{
2871 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2872 int err;
2873
2874
2875
2876
2877
2878
2879 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2880 return generic_setxattr(dentry, name, value, size, flags);
2881
2882 err = shmem_xattr_validate(name);
2883 if (err)
2884 return err;
2885
2886 return simple_xattr_set(&info->xattrs, name, value, size, flags);
2887}
2888
2889static int shmem_removexattr(struct dentry *dentry, const char *name)
2890{
2891 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2892 int err;
2893
2894
2895
2896
2897
2898
2899 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2900 return generic_removexattr(dentry, name);
2901
2902 err = shmem_xattr_validate(name);
2903 if (err)
2904 return err;
2905
2906 return simple_xattr_remove(&info->xattrs, name);
2907}
2908
2909static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2910{
2911 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2912 return simple_xattr_list(&info->xattrs, buffer, size);
2913}
2914#endif
2915
2916static const struct inode_operations shmem_short_symlink_operations = {
2917 .readlink = generic_readlink,
2918 .follow_link = shmem_follow_short_symlink,
2919#ifdef CONFIG_TMPFS_XATTR
2920 .setxattr = shmem_setxattr,
2921 .getxattr = shmem_getxattr,
2922 .listxattr = shmem_listxattr,
2923 .removexattr = shmem_removexattr,
2924#endif
2925};
2926
2927static const struct inode_operations shmem_symlink_inode_operations = {
2928 .readlink = generic_readlink,
2929 .follow_link = shmem_follow_link,
2930 .put_link = shmem_put_link,
2931#ifdef CONFIG_TMPFS_XATTR
2932 .setxattr = shmem_setxattr,
2933 .getxattr = shmem_getxattr,
2934 .listxattr = shmem_listxattr,
2935 .removexattr = shmem_removexattr,
2936#endif
2937};
2938
2939static struct dentry *shmem_get_parent(struct dentry *child)
2940{
2941 return ERR_PTR(-ESTALE);
2942}
2943
2944static int shmem_match(struct inode *ino, void *vfh)
2945{
2946 __u32 *fh = vfh;
2947 __u64 inum = fh[2];
2948 inum = (inum << 32) | fh[1];
2949 return ino->i_ino == inum && fh[0] == ino->i_generation;
2950}
2951
2952static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2953 struct fid *fid, int fh_len, int fh_type)
2954{
2955 struct inode *inode;
2956 struct dentry *dentry = NULL;
2957 u64 inum;
2958
2959 if (fh_len < 3)
2960 return NULL;
2961
2962 inum = fid->raw[2];
2963 inum = (inum << 32) | fid->raw[1];
2964
2965 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2966 shmem_match, fid->raw);
2967 if (inode) {
2968 dentry = d_find_alias(inode);
2969 iput(inode);
2970 }
2971
2972 return dentry;
2973}
2974
2975static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
2976 struct inode *parent)
2977{
2978 if (*len < 3) {
2979 *len = 3;
2980 return FILEID_INVALID;
2981 }
2982
2983 if (inode_unhashed(inode)) {
2984
2985
2986
2987
2988
2989 static DEFINE_SPINLOCK(lock);
2990 spin_lock(&lock);
2991 if (inode_unhashed(inode))
2992 __insert_inode_hash(inode,
2993 inode->i_ino + inode->i_generation);
2994 spin_unlock(&lock);
2995 }
2996
2997 fh[0] = inode->i_generation;
2998 fh[1] = inode->i_ino;
2999 fh[2] = ((__u64)inode->i_ino) >> 32;
3000
3001 *len = 3;
3002 return 1;
3003}
3004
3005static const struct export_operations shmem_export_ops = {
3006 .get_parent = shmem_get_parent,
3007 .encode_fh = shmem_encode_fh,
3008 .fh_to_dentry = shmem_fh_to_dentry,
3009};
3010
3011static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
3012 bool remount)
3013{
3014 char *this_char, *value, *rest;
3015 struct mempolicy *mpol = NULL;
3016 uid_t uid;
3017 gid_t gid;
3018
3019 while (options != NULL) {
3020 this_char = options;
3021 for (;;) {
3022
3023
3024
3025
3026
3027 options = strchr(options, ',');
3028 if (options == NULL)
3029 break;
3030 options++;
3031 if (!isdigit(*options)) {
3032 options[-1] = '\0';
3033 break;
3034 }
3035 }
3036 if (!*this_char)
3037 continue;
3038 if ((value = strchr(this_char,'=')) != NULL) {
3039 *value++ = 0;
3040 } else {
3041 printk(KERN_ERR
3042 "tmpfs: No value for mount option '%s'\n",
3043 this_char);
3044 goto error;
3045 }
3046
3047 if (!strcmp(this_char,"size")) {
3048 unsigned long long size;
3049 size = memparse(value,&rest);
3050 if (*rest == '%') {
3051 size <<= PAGE_SHIFT;
3052 size *= totalram_pages;
3053 do_div(size, 100);
3054 rest++;
3055 }
3056 if (*rest)
3057 goto bad_val;
3058 sbinfo->max_blocks =
3059 DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
3060 } else if (!strcmp(this_char,"nr_blocks")) {
3061 sbinfo->max_blocks = memparse(value, &rest);
3062 if (*rest)
3063 goto bad_val;
3064 } else if (!strcmp(this_char,"nr_inodes")) {
3065 sbinfo->max_inodes = memparse(value, &rest);
3066 if (*rest)
3067 goto bad_val;
3068 } else if (!strcmp(this_char,"mode")) {
3069 if (remount)
3070 continue;
3071 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
3072 if (*rest)
3073 goto bad_val;
3074 } else if (!strcmp(this_char,"uid")) {
3075 if (remount)
3076 continue;
3077 uid = simple_strtoul(value, &rest, 0);
3078 if (*rest)
3079 goto bad_val;
3080 sbinfo->uid = make_kuid(current_user_ns(), uid);
3081 if (!uid_valid(sbinfo->uid))
3082 goto bad_val;
3083 } else if (!strcmp(this_char,"gid")) {
3084 if (remount)
3085 continue;
3086 gid = simple_strtoul(value, &rest, 0);
3087 if (*rest)
3088 goto bad_val;
3089 sbinfo->gid = make_kgid(current_user_ns(), gid);
3090 if (!gid_valid(sbinfo->gid))
3091 goto bad_val;
3092 } else if (!strcmp(this_char,"mpol")) {
3093 mpol_put(mpol);
3094 mpol = NULL;
3095 if (mpol_parse_str(value, &mpol))
3096 goto bad_val;
3097 } else {
3098 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
3099 this_char);
3100 goto error;
3101 }
3102 }
3103 sbinfo->mpol = mpol;
3104 return 0;
3105
3106bad_val:
3107 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
3108 value, this_char);
3109error:
3110 mpol_put(mpol);
3111 return 1;
3112
3113}
3114
3115static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
3116{
3117 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3118 struct shmem_sb_info config = *sbinfo;
3119 unsigned long inodes;
3120 int error = -EINVAL;
3121
3122 config.mpol = NULL;
3123 if (shmem_parse_options(data, &config, true))
3124 return error;
3125
3126 spin_lock(&sbinfo->stat_lock);
3127 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
3128 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
3129 goto out;
3130 if (config.max_inodes < inodes)
3131 goto out;
3132
3133
3134
3135
3136
3137 if (config.max_blocks && !sbinfo->max_blocks)
3138 goto out;
3139 if (config.max_inodes && !sbinfo->max_inodes)
3140 goto out;
3141
3142 error = 0;
3143 sbinfo->max_blocks = config.max_blocks;
3144 sbinfo->max_inodes = config.max_inodes;
3145 sbinfo->free_inodes = config.max_inodes - inodes;
3146
3147
3148
3149
3150 if (config.mpol) {
3151 mpol_put(sbinfo->mpol);
3152 sbinfo->mpol = config.mpol;
3153 }
3154out:
3155 spin_unlock(&sbinfo->stat_lock);
3156 return error;
3157}
3158
3159static int shmem_show_options(struct seq_file *seq, struct dentry *root)
3160{
3161 struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
3162
3163 if (sbinfo->max_blocks != shmem_default_max_blocks())
3164 seq_printf(seq, ",size=%luk",
3165 sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
3166 if (sbinfo->max_inodes != shmem_default_max_inodes())
3167 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
3168 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
3169 seq_printf(seq, ",mode=%03ho", sbinfo->mode);
3170 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
3171 seq_printf(seq, ",uid=%u",
3172 from_kuid_munged(&init_user_ns, sbinfo->uid));
3173 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
3174 seq_printf(seq, ",gid=%u",
3175 from_kgid_munged(&init_user_ns, sbinfo->gid));
3176 shmem_show_mpol(seq, sbinfo->mpol);
3177 return 0;
3178}
3179
3180#define MFD_NAME_PREFIX "memfd:"
3181#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
3182#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
3183
3184#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
3185
3186SYSCALL_DEFINE2(memfd_create,
3187 const char __user *, uname,
3188 unsigned int, flags)
3189{
3190 struct shmem_inode_info *info;
3191 struct file *file;
3192 int fd, error;
3193 char *name;
3194 long len;
3195
3196 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3197 return -EINVAL;
3198
3199
3200 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
3201 if (len <= 0)
3202 return -EFAULT;
3203 if (len > MFD_NAME_MAX_LEN + 1)
3204 return -EINVAL;
3205
3206 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
3207 if (!name)
3208 return -ENOMEM;
3209
3210 strcpy(name, MFD_NAME_PREFIX);
3211 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
3212 error = -EFAULT;
3213 goto err_name;
3214 }
3215
3216
3217 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
3218 error = -EFAULT;
3219 goto err_name;
3220 }
3221
3222 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
3223 if (fd < 0) {
3224 error = fd;
3225 goto err_name;
3226 }
3227
3228 file = shmem_file_setup(name, 0, VM_NORESERVE);
3229 if (IS_ERR(file)) {
3230 error = PTR_ERR(file);
3231 goto err_fd;
3232 }
3233 info = SHMEM_I(file_inode(file));
3234 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
3235 file->f_flags |= O_RDWR | O_LARGEFILE;
3236 if (flags & MFD_ALLOW_SEALING)
3237 info->seals &= ~F_SEAL_SEAL;
3238
3239 fd_install(fd, file);
3240 kfree(name);
3241 return fd;
3242
3243err_fd:
3244 put_unused_fd(fd);
3245err_name:
3246 kfree(name);
3247 return error;
3248}
3249
3250#endif
3251
3252static void shmem_put_super(struct super_block *sb)
3253{
3254 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3255
3256 percpu_counter_destroy(&sbinfo->used_blocks);
3257 mpol_put(sbinfo->mpol);
3258 kfree(sbinfo);
3259 sb->s_fs_info = NULL;
3260}
3261
3262int shmem_fill_super(struct super_block *sb, void *data, int silent)
3263{
3264 struct inode *inode;
3265 struct shmem_sb_info *sbinfo;
3266 int err = -ENOMEM;
3267
3268
3269 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
3270 L1_CACHE_BYTES), GFP_KERNEL);
3271 if (!sbinfo)
3272 return -ENOMEM;
3273
3274 sbinfo->mode = S_IRWXUGO | S_ISVTX;
3275 sbinfo->uid = current_fsuid();
3276 sbinfo->gid = current_fsgid();
3277 sb->s_fs_info = sbinfo;
3278
3279#ifdef CONFIG_TMPFS
3280
3281
3282
3283
3284
3285 if (!(sb->s_flags & MS_KERNMOUNT)) {
3286 sbinfo->max_blocks = shmem_default_max_blocks();
3287 sbinfo->max_inodes = shmem_default_max_inodes();
3288 if (shmem_parse_options(data, sbinfo, false)) {
3289 err = -EINVAL;
3290 goto failed;
3291 }
3292 } else {
3293 sb->s_flags |= MS_NOUSER;
3294 }
3295 sb->s_export_op = &shmem_export_ops;
3296 sb->s_flags |= MS_NOSEC;
3297#else
3298 sb->s_flags |= MS_NOUSER;
3299#endif
3300
3301 spin_lock_init(&sbinfo->stat_lock);
3302 if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
3303 goto failed;
3304 sbinfo->free_inodes = sbinfo->max_inodes;
3305
3306 sb->s_maxbytes = MAX_LFS_FILESIZE;
3307 sb->s_blocksize = PAGE_CACHE_SIZE;
3308 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
3309 sb->s_magic = TMPFS_MAGIC;
3310 sb->s_op = &shmem_ops;
3311 sb->s_time_gran = 1;
3312#ifdef CONFIG_TMPFS_XATTR
3313 sb->s_xattr = shmem_xattr_handlers;
3314#endif
3315#ifdef CONFIG_TMPFS_POSIX_ACL
3316 sb->s_flags |= MS_POSIXACL;
3317#endif
3318 uuid_be_gen((uuid_be *) &sb->s_uuid);
3319
3320 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
3321 if (!inode)
3322 goto failed;
3323 inode->i_uid = sbinfo->uid;
3324 inode->i_gid = sbinfo->gid;
3325 sb->s_root = d_make_root(inode);
3326 if (!sb->s_root)
3327 goto failed;
3328 return 0;
3329
3330failed:
3331 shmem_put_super(sb);
3332 return err;
3333}
3334
3335static struct kmem_cache *shmem_inode_cachep;
3336
3337static struct inode *shmem_alloc_inode(struct super_block *sb)
3338{
3339 struct shmem_inode_info *info;
3340 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
3341 if (!info)
3342 return NULL;
3343 return &info->vfs_inode;
3344}
3345
3346static void shmem_destroy_callback(struct rcu_head *head)
3347{
3348 struct inode *inode = container_of(head, struct inode, i_rcu);
3349 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3350}
3351
3352static void shmem_destroy_inode(struct inode *inode)
3353{
3354 if (S_ISREG(inode->i_mode))
3355 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
3356 call_rcu(&inode->i_rcu, shmem_destroy_callback);
3357}
3358
3359static void shmem_init_inode(void *foo)
3360{
3361 struct shmem_inode_info *info = foo;
3362 inode_init_once(&info->vfs_inode);
3363}
3364
3365static int shmem_init_inodecache(void)
3366{
3367 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
3368 sizeof(struct shmem_inode_info),
3369 0, SLAB_PANIC, shmem_init_inode);
3370 return 0;
3371}
3372
3373static void shmem_destroy_inodecache(void)
3374{
3375 kmem_cache_destroy(shmem_inode_cachep);
3376}
3377
3378static const struct address_space_operations shmem_aops = {
3379 .writepage = shmem_writepage,
3380 .set_page_dirty = __set_page_dirty_no_writeback,
3381#ifdef CONFIG_TMPFS
3382 .write_begin = shmem_write_begin,
3383 .write_end = shmem_write_end,
3384#endif
3385 .migratepage = migrate_page,
3386 .error_remove_page = generic_error_remove_page,
3387};
3388
3389static const struct file_operations shmem_file_operations = {
3390 .mmap = shmem_mmap,
3391#ifdef CONFIG_TMPFS
3392 .llseek = shmem_file_llseek,
3393 .read = do_sync_read,
3394 .write = do_sync_write,
3395 .aio_read = shmem_file_aio_read,
3396 .aio_write = generic_file_aio_write,
3397 .fsync = noop_fsync,
3398 .splice_read = shmem_file_splice_read,
3399 .splice_write = generic_file_splice_write,
3400 .fallocate = shmem_fallocate,
3401#endif
3402};
3403
3404static const struct inode_operations shmem_inode_operations = {
3405 .setattr = shmem_setattr,
3406#ifdef CONFIG_TMPFS_XATTR
3407 .setxattr = shmem_setxattr,
3408 .getxattr = shmem_getxattr,
3409 .listxattr = shmem_listxattr,
3410 .removexattr = shmem_removexattr,
3411#endif
3412};
3413
3414static const struct inode_operations_wrapper shmem_dir_inode_operations = {
3415 .ops = {
3416#ifdef CONFIG_TMPFS
3417 .create = shmem_create,
3418 .lookup = simple_lookup,
3419 .link = shmem_link,
3420 .unlink = shmem_unlink,
3421 .symlink = shmem_symlink,
3422 .mkdir = shmem_mkdir,
3423 .rmdir = shmem_rmdir,
3424 .mknod = shmem_mknod,
3425 .rename = shmem_rename,
3426#endif
3427#ifdef CONFIG_TMPFS_XATTR
3428 .setxattr = shmem_setxattr,
3429 .getxattr = shmem_getxattr,
3430 .listxattr = shmem_listxattr,
3431 .removexattr = shmem_removexattr,
3432#endif
3433#ifdef CONFIG_TMPFS_POSIX_ACL
3434 .setattr = shmem_setattr,
3435#endif
3436 },
3437#ifdef CONFIG_TMPFS
3438 .rename2 = shmem_rename2,
3439 .tmpfile = shmem_tmpfile,
3440#endif
3441};
3442
3443static const struct inode_operations shmem_special_inode_operations = {
3444#ifdef CONFIG_TMPFS_XATTR
3445 .setxattr = shmem_setxattr,
3446 .getxattr = shmem_getxattr,
3447 .listxattr = shmem_listxattr,
3448 .removexattr = shmem_removexattr,
3449#endif
3450#ifdef CONFIG_TMPFS_POSIX_ACL
3451 .setattr = shmem_setattr,
3452#endif
3453};
3454
3455static const struct super_operations shmem_ops = {
3456 .alloc_inode = shmem_alloc_inode,
3457 .destroy_inode = shmem_destroy_inode,
3458#ifdef CONFIG_TMPFS
3459 .statfs = shmem_statfs,
3460 .remount_fs = shmem_remount_fs,
3461 .show_options = shmem_show_options,
3462#endif
3463 .evict_inode = shmem_evict_inode,
3464 .drop_inode = generic_delete_inode,
3465 .put_super = shmem_put_super,
3466};
3467
3468static const struct vm_operations_struct shmem_vm_ops = {
3469 .fault = shmem_fault,
3470#ifdef CONFIG_NUMA
3471 .set_policy = shmem_set_policy,
3472 .get_policy = shmem_get_policy,
3473#endif
3474 .remap_pages = generic_file_remap_pages,
3475};
3476
3477static struct dentry *shmem_mount(struct file_system_type *fs_type,
3478 int flags, const char *dev_name, void *data)
3479{
3480 return mount_nodev(fs_type, flags, data, shmem_fill_super);
3481}
3482
3483static struct file_system_type shmem_fs_type = {
3484 .owner = THIS_MODULE,
3485 .name = "tmpfs",
3486 .mount = shmem_mount,
3487 .kill_sb = kill_litter_super,
3488 .fs_flags = FS_USERNS_MOUNT,
3489};
3490
3491int __init shmem_init(void)
3492{
3493 int error;
3494
3495
3496 if (shmem_inode_cachep)
3497 return 0;
3498
3499 error = bdi_init(&shmem_backing_dev_info);
3500 if (error)
3501 goto out4;
3502
3503 error = shmem_init_inodecache();
3504 if (error)
3505 goto out3;
3506
3507 error = register_filesystem(&shmem_fs_type);
3508 if (error) {
3509 printk(KERN_ERR "Could not register tmpfs\n");
3510 goto out2;
3511 }
3512
3513 shm_mnt = kern_mount(&shmem_fs_type);
3514 if (IS_ERR(shm_mnt)) {
3515 error = PTR_ERR(shm_mnt);
3516 printk(KERN_ERR "Could not kern_mount tmpfs\n");
3517 goto out1;
3518 }
3519 return 0;
3520
3521out1:
3522 unregister_filesystem(&shmem_fs_type);
3523out2:
3524 shmem_destroy_inodecache();
3525out3:
3526 bdi_destroy(&shmem_backing_dev_info);
3527out4:
3528 shm_mnt = ERR_PTR(error);
3529 return error;
3530}
3531
3532#else
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543static struct file_system_type shmem_fs_type = {
3544 .name = "tmpfs",
3545 .mount = ramfs_mount,
3546 .kill_sb = kill_litter_super,
3547 .fs_flags = FS_USERNS_MOUNT,
3548};
3549
3550int __init shmem_init(void)
3551{
3552 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
3553
3554 shm_mnt = kern_mount(&shmem_fs_type);
3555 BUG_ON(IS_ERR(shm_mnt));
3556
3557 return 0;
3558}
3559
3560int shmem_unuse(swp_entry_t swap, struct page *page)
3561{
3562 return 0;
3563}
3564
3565int shmem_lock(struct file *file, int lock, struct user_struct *user)
3566{
3567 return 0;
3568}
3569
3570void shmem_unlock_mapping(struct address_space *mapping)
3571{
3572}
3573
3574void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
3575{
3576 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
3577}
3578EXPORT_SYMBOL_GPL(shmem_truncate_range);
3579
3580#define shmem_vm_ops generic_file_vm_ops
3581#define shmem_file_operations ramfs_file_operations
3582#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
3583#define shmem_acct_size(flags, size) 0
3584#define shmem_unacct_size(flags, size) do {} while (0)
3585
3586#endif
3587
3588
3589
3590static struct dentry_operations anon_ops = {
3591 .d_dname = simple_dname
3592};
3593
3594static struct file *__shmem_file_setup(const char *name, loff_t size,
3595 unsigned long flags, unsigned int i_flags)
3596{
3597 struct file *res;
3598 struct inode *inode;
3599 struct path path;
3600 struct super_block *sb;
3601 struct qstr this;
3602
3603 if (IS_ERR(shm_mnt))
3604 return ERR_CAST(shm_mnt);
3605
3606 if (size < 0 || size > MAX_LFS_FILESIZE)
3607 return ERR_PTR(-EINVAL);
3608
3609 if (shmem_acct_size(flags, size))
3610 return ERR_PTR(-ENOMEM);
3611
3612 res = ERR_PTR(-ENOMEM);
3613 this.name = name;
3614 this.len = strlen(name);
3615 this.hash = 0;
3616 sb = shm_mnt->mnt_sb;
3617 path.dentry = d_alloc_pseudo(sb, &this);
3618 if (!path.dentry)
3619 goto put_memory;
3620 d_set_d_op(path.dentry, &anon_ops);
3621 path.mnt = mntget(shm_mnt);
3622
3623 res = ERR_PTR(-ENOSPC);
3624 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
3625 if (!inode)
3626 goto put_dentry;
3627
3628 inode->i_flags |= i_flags;
3629 d_instantiate(path.dentry, inode);
3630 inode->i_size = size;
3631 clear_nlink(inode);
3632 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
3633 if (IS_ERR(res))
3634 goto put_dentry;
3635
3636 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
3637 &shmem_file_operations);
3638 if (IS_ERR(res))
3639 goto put_dentry;
3640
3641 return res;
3642
3643put_dentry:
3644 path_put(&path);
3645put_memory:
3646 shmem_unacct_size(flags, size);
3647 return res;
3648}
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
3661{
3662 return __shmem_file_setup(name, size, flags, S_PRIVATE);
3663}
3664
3665
3666
3667
3668
3669
3670
3671struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
3672{
3673 return __shmem_file_setup(name, size, flags, 0);
3674}
3675EXPORT_SYMBOL_GPL(shmem_file_setup);
3676
3677
3678
3679
3680
3681int shmem_zero_setup(struct vm_area_struct *vma)
3682{
3683 struct file *file;
3684 loff_t size = vma->vm_end - vma->vm_start;
3685
3686 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
3687 if (IS_ERR(file))
3688 return PTR_ERR(file);
3689
3690 if (vma->vm_file)
3691 fput(vma->vm_file);
3692 vma->vm_file = file;
3693 vma->vm_ops = &shmem_vm_ops;
3694 return 0;
3695}
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
3713 pgoff_t index, gfp_t gfp)
3714{
3715#ifdef CONFIG_SHMEM
3716 struct inode *inode = mapping->host;
3717 struct page *page;
3718 int error;
3719
3720 BUG_ON(mapping->a_ops != &shmem_aops);
3721 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
3722 gfp, NULL, NULL, NULL);
3723 if (error)
3724 page = ERR_PTR(error);
3725 else
3726 unlock_page(page);
3727 return page;
3728#else
3729
3730
3731
3732 return read_cache_page_gfp(mapping, index, gfp);
3733#endif
3734}
3735EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
3736