1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/pagemap.h>
30#include <linux/file.h>
31#include <linux/mm.h>
32#include <linux/export.h>
33#include <linux/swap.h>
34#include <linux/aio.h>
35#include <linux/syscalls.h>
36#include <uapi/linux/memfd.h>
37
38static struct vfsmount *shm_mnt;
39
40#ifdef CONFIG_SHMEM
41
42
43
44
45
46
47#include <linux/xattr.h>
48#include <linux/exportfs.h>
49#include <linux/posix_acl.h>
50#include <linux/generic_acl.h>
51#include <linux/mman.h>
52#include <linux/string.h>
53#include <linux/slab.h>
54#include <linux/backing-dev.h>
55#include <linux/shmem_fs.h>
56#include <linux/writeback.h>
57#include <linux/blkdev.h>
58#include <linux/pagevec.h>
59#include <linux/percpu_counter.h>
60#include <linux/falloc.h>
61#include <linux/splice.h>
62#include <linux/security.h>
63#include <linux/swapops.h>
64#include <linux/mempolicy.h>
65#include <linux/namei.h>
66#include <linux/ctype.h>
67#include <linux/migrate.h>
68#include <linux/highmem.h>
69#include <linux/seq_file.h>
70#include <linux/magic.h>
71#include <linux/fcntl.h>
72#include <linux/userfaultfd_k.h>
73#include <linux/rmap.h>
74#include <linux/uuid.h>
75
76#include <asm/uaccess.h>
77#include <asm/pgtable.h>
78
79#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
80#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
81
82
83#define BOGO_DIRENT_SIZE 20
84
85
86#define SHORT_SYMLINK_LEN 128
87
88
89
90
91
92
93struct shmem_falloc {
94 wait_queue_head_t *waitq;
95 pgoff_t start;
96 pgoff_t next;
97 pgoff_t nr_falloced;
98 pgoff_t nr_unswapped;
99};
100
101
102enum sgp_type {
103 SGP_READ,
104 SGP_CACHE,
105 SGP_DIRTY,
106 SGP_WRITE,
107 SGP_FALLOC,
108};
109
110#ifdef CONFIG_TMPFS
111static unsigned long shmem_default_max_blocks(void)
112{
113 return totalram_pages / 2;
114}
115
116static unsigned long shmem_default_max_inodes(void)
117{
118 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
119}
120#endif
121
122static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
123static int shmem_replace_page(struct page **pagep, gfp_t gfp,
124 struct shmem_inode_info *info, pgoff_t index);
125static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
126 struct page **pagep, enum sgp_type sgp,
127 gfp_t gfp, struct vm_area_struct *vma,
128 struct vm_fault *vmf, int *fault_type);
129
130static inline int shmem_getpage(struct inode *inode, pgoff_t index,
131 struct page **pagep, enum sgp_type sgp,
132 int *fault_type)
133{
134 return shmem_getpage_gfp(inode, index, pagep, sgp,
135 mapping_gfp_mask(inode->i_mapping),
136 NULL, NULL, fault_type);
137}
138
139static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
140{
141 return sb->s_fs_info;
142}
143
144
145
146
147
148
149
150static inline int shmem_acct_size(unsigned long flags, loff_t size)
151{
152 return (flags & VM_NORESERVE) ?
153 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
154}
155
156static inline void shmem_unacct_size(unsigned long flags, loff_t size)
157{
158 if (!(flags & VM_NORESERVE))
159 vm_unacct_memory(VM_ACCT(size));
160}
161
162
163
164
165
166
167
168static inline int shmem_acct_block(unsigned long flags, long pages)
169{
170 return (flags & VM_NORESERVE) ?
171 security_vm_enough_memory_mm(current->mm, pages *
172 VM_ACCT(PAGE_CACHE_SIZE)) : 0;
173}
174
175static inline void shmem_unacct_blocks(unsigned long flags, long pages)
176{
177 if (flags & VM_NORESERVE)
178 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
179}
180
181static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
182{
183 struct shmem_inode_info *info = SHMEM_I(inode);
184 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
185
186 if (shmem_acct_block(info->flags, pages))
187 return false;
188
189 if (sbinfo->max_blocks) {
190 if (percpu_counter_compare(&sbinfo->used_blocks,
191 sbinfo->max_blocks - pages) > 0)
192 goto unacct;
193 percpu_counter_add(&sbinfo->used_blocks, pages);
194 }
195
196 return true;
197
198unacct:
199 shmem_unacct_blocks(info->flags, pages);
200 return false;
201}
202
203static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
204{
205 struct shmem_inode_info *info = SHMEM_I(inode);
206 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
207
208 if (sbinfo->max_blocks)
209 percpu_counter_sub(&sbinfo->used_blocks, pages);
210 shmem_unacct_blocks(info->flags, pages);
211}
212
213static const struct super_operations shmem_ops;
214static const struct address_space_operations shmem_aops;
215static const struct file_operations shmem_file_operations;
216static const struct inode_operations shmem_inode_operations;
217static const struct inode_operations_wrapper shmem_dir_inode_operations;
218static const struct inode_operations shmem_special_inode_operations;
219static const struct vm_operations_struct shmem_vm_ops;
220
221static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
222 .ra_pages = 0,
223 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
224};
225
226bool vma_is_shmem(struct vm_area_struct *vma)
227{
228 return vma->vm_ops == &shmem_vm_ops;
229}
230
231static LIST_HEAD(shmem_swaplist);
232static DEFINE_MUTEX(shmem_swaplist_mutex);
233
234static int shmem_reserve_inode(struct super_block *sb)
235{
236 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
237 if (sbinfo->max_inodes) {
238 spin_lock(&sbinfo->stat_lock);
239 if (!sbinfo->free_inodes) {
240 spin_unlock(&sbinfo->stat_lock);
241 return -ENOSPC;
242 }
243 sbinfo->free_inodes--;
244 spin_unlock(&sbinfo->stat_lock);
245 }
246 return 0;
247}
248
249static void shmem_free_inode(struct super_block *sb)
250{
251 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
252 if (sbinfo->max_inodes) {
253 spin_lock(&sbinfo->stat_lock);
254 sbinfo->free_inodes++;
255 spin_unlock(&sbinfo->stat_lock);
256 }
257}
258
259
260
261
262
263
264
265
266
267
268
269
270
271static void shmem_recalc_inode(struct inode *inode)
272{
273 struct shmem_inode_info *info = SHMEM_I(inode);
274 long freed;
275
276 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
277 if (freed > 0) {
278 info->alloced -= freed;
279 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
280 shmem_inode_unacct_blocks(inode, freed);
281 }
282}
283
284
285
286
287static int shmem_radix_tree_replace(struct address_space *mapping,
288 pgoff_t index, void *expected, void *replacement)
289{
290 void **pslot;
291 void *item;
292
293 VM_BUG_ON(!expected);
294 VM_BUG_ON(!replacement);
295 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
296 if (!pslot)
297 return -ENOENT;
298 item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
299 if (item != expected)
300 return -ENOENT;
301 radix_tree_replace_slot(pslot, replacement);
302 return 0;
303}
304
305
306
307
308
309
310
311
312static bool shmem_confirm_swap(struct address_space *mapping,
313 pgoff_t index, swp_entry_t swap)
314{
315 void *item;
316
317 rcu_read_lock();
318 item = radix_tree_lookup(&mapping->page_tree, index);
319 rcu_read_unlock();
320 return item == swp_to_radix_entry(swap);
321}
322
323
324
325
326static int shmem_add_to_page_cache(struct page *page,
327 struct address_space *mapping,
328 pgoff_t index, gfp_t gfp, void *expected)
329{
330 int error;
331
332 VM_BUG_ON_PAGE(!PageLocked(page), page);
333 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
334
335 page_cache_get(page);
336 page->mapping = mapping;
337 page->index = index;
338
339 spin_lock_irq(&mapping->tree_lock);
340 if (!expected)
341 error = radix_tree_insert(&mapping->page_tree, index, page);
342 else
343 error = shmem_radix_tree_replace(mapping, index, expected,
344 page);
345 if (!error) {
346 mapping->nrpages++;
347 __inc_zone_page_state(page, NR_FILE_PAGES);
348 __inc_zone_page_state(page, NR_SHMEM);
349 spin_unlock_irq(&mapping->tree_lock);
350 } else {
351 page->mapping = NULL;
352 spin_unlock_irq(&mapping->tree_lock);
353 page_cache_release(page);
354 }
355 return error;
356}
357
358
359
360
361static void shmem_delete_from_page_cache(struct page *page, void *radswap)
362{
363 struct address_space *mapping = page->mapping;
364 int error;
365
366 spin_lock_irq(&mapping->tree_lock);
367 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
368 page->mapping = NULL;
369 mapping->nrpages--;
370 __dec_zone_page_state(page, NR_FILE_PAGES);
371 __dec_zone_page_state(page, NR_SHMEM);
372 spin_unlock_irq(&mapping->tree_lock);
373 page_cache_release(page);
374 BUG_ON(error);
375}
376
377
378
379
380static int shmem_free_swap(struct address_space *mapping,
381 pgoff_t index, void *radswap)
382{
383 void *old;
384
385 spin_lock_irq(&mapping->tree_lock);
386 old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
387 spin_unlock_irq(&mapping->tree_lock);
388 if (old != radswap)
389 return -ENOENT;
390 free_swap_and_cache(radix_to_swp_entry(radswap));
391 return 0;
392}
393
394
395
396
397
398
399
400
401unsigned long shmem_partial_swap_usage(struct address_space *mapping,
402 pgoff_t start, pgoff_t end)
403{
404 struct radix_tree_iter iter;
405 void **slot;
406 struct page *page;
407 unsigned long swapped = 0;
408
409 rcu_read_lock();
410
411 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
412 if (iter.index >= end)
413 break;
414
415 page = radix_tree_deref_slot(slot);
416
417 if (radix_tree_deref_retry(page)) {
418 slot = radix_tree_iter_retry(&iter);
419 continue;
420 }
421
422 if (radix_tree_exceptional_entry(page))
423 swapped++;
424
425 if (need_resched()) {
426 cond_resched_rcu();
427 slot = radix_tree_iter_next(&iter);
428 }
429 }
430
431 rcu_read_unlock();
432
433 return swapped << PAGE_SHIFT;
434}
435
436
437
438
439
440
441
442
443unsigned long shmem_swap_usage(struct vm_area_struct *vma)
444{
445 struct inode *inode = file_inode(vma->vm_file);
446 struct shmem_inode_info *info = SHMEM_I(inode);
447 struct address_space *mapping = inode->i_mapping;
448 unsigned long swapped;
449
450
451 swapped = READ_ONCE(info->swapped);
452
453
454
455
456
457
458 if (!swapped)
459 return 0;
460
461 if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
462 return swapped << PAGE_SHIFT;
463
464
465 return shmem_partial_swap_usage(mapping,
466 linear_page_index(vma, vma->vm_start),
467 linear_page_index(vma, vma->vm_end));
468}
469
470
471
472
473void shmem_unlock_mapping(struct address_space *mapping)
474{
475 struct pagevec pvec;
476 pgoff_t indices[PAGEVEC_SIZE];
477 pgoff_t index = 0;
478
479 pagevec_init(&pvec, 0);
480
481
482
483 while (!mapping_unevictable(mapping)) {
484
485
486
487
488 pvec.nr = __find_get_pages(mapping, index,
489 PAGEVEC_SIZE, pvec.pages, indices);
490 if (!pvec.nr)
491 break;
492 index = indices[pvec.nr - 1] + 1;
493 pagevec_remove_exceptionals(&pvec);
494 check_move_unevictable_pages(&pvec);
495 pagevec_release(&pvec);
496 cond_resched();
497 }
498}
499
500
501
502
503
504static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
505 bool unfalloc)
506{
507 struct address_space *mapping = inode->i_mapping;
508 struct shmem_inode_info *info = SHMEM_I(inode);
509 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
510 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
511 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
512 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
513 struct pagevec pvec;
514 pgoff_t indices[PAGEVEC_SIZE];
515 long nr_swaps_freed = 0;
516 pgoff_t index;
517 int i;
518
519 if (lend == -1)
520 end = -1;
521
522 pagevec_init(&pvec, 0);
523 index = start;
524 while (index < end) {
525 pvec.nr = __find_get_pages(mapping, index,
526 min(end - index, (pgoff_t)PAGEVEC_SIZE),
527 pvec.pages, indices);
528 if (!pvec.nr)
529 break;
530 mem_cgroup_uncharge_start();
531 for (i = 0; i < pagevec_count(&pvec); i++) {
532 struct page *page = pvec.pages[i];
533
534 index = indices[i];
535 if (index >= end)
536 break;
537
538 if (radix_tree_exceptional_entry(page)) {
539 if (unfalloc)
540 continue;
541 nr_swaps_freed += !shmem_free_swap(mapping,
542 index, page);
543 continue;
544 }
545
546 if (!trylock_page(page))
547 continue;
548 if (!unfalloc || !PageUptodate(page)) {
549 if (page->mapping == mapping) {
550 VM_BUG_ON_PAGE(PageWriteback(page), page);
551 truncate_inode_page(mapping, page);
552 }
553 }
554 unlock_page(page);
555 }
556 pagevec_remove_exceptionals(&pvec);
557 pagevec_release(&pvec);
558 mem_cgroup_uncharge_end();
559 cond_resched();
560 index++;
561 }
562
563 if (partial_start) {
564 struct page *page = NULL;
565 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
566 if (page) {
567 unsigned int top = PAGE_CACHE_SIZE;
568 if (start > end) {
569 top = partial_end;
570 partial_end = 0;
571 }
572 zero_user_segment(page, partial_start, top);
573 set_page_dirty(page);
574 unlock_page(page);
575 page_cache_release(page);
576 }
577 }
578 if (partial_end) {
579 struct page *page = NULL;
580 shmem_getpage(inode, end, &page, SGP_READ, NULL);
581 if (page) {
582 zero_user_segment(page, 0, partial_end);
583 set_page_dirty(page);
584 unlock_page(page);
585 page_cache_release(page);
586 }
587 }
588 if (start >= end)
589 return;
590
591 index = start;
592 while (index < end) {
593 cond_resched();
594
595 pvec.nr = __find_get_pages(mapping, index,
596 min(end - index, (pgoff_t)PAGEVEC_SIZE),
597 pvec.pages, indices);
598 if (!pvec.nr) {
599
600 if (index == start || end != -1)
601 break;
602
603 index = start;
604 continue;
605 }
606 mem_cgroup_uncharge_start();
607 for (i = 0; i < pagevec_count(&pvec); i++) {
608 struct page *page = pvec.pages[i];
609
610 index = indices[i];
611 if (index >= end)
612 break;
613
614 if (radix_tree_exceptional_entry(page)) {
615 if (unfalloc)
616 continue;
617 if (shmem_free_swap(mapping, index, page)) {
618
619 index--;
620 break;
621 }
622 nr_swaps_freed++;
623 continue;
624 }
625
626 lock_page(page);
627 if (!unfalloc || !PageUptodate(page)) {
628 if (page->mapping == mapping) {
629 VM_BUG_ON_PAGE(PageWriteback(page), page);
630 truncate_inode_page(mapping, page);
631 } else {
632
633 unlock_page(page);
634 index--;
635 break;
636 }
637 }
638 unlock_page(page);
639 }
640 pagevec_remove_exceptionals(&pvec);
641 pagevec_release(&pvec);
642 mem_cgroup_uncharge_end();
643 index++;
644 }
645
646 spin_lock(&info->lock);
647 info->swapped -= nr_swaps_freed;
648 shmem_recalc_inode(inode);
649 spin_unlock(&info->lock);
650}
651
652void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
653{
654 shmem_undo_range(inode, lstart, lend, false);
655 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
656}
657EXPORT_SYMBOL_GPL(shmem_truncate_range);
658
659static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
660{
661 struct inode *inode = dentry->d_inode;
662 struct shmem_inode_info *info = SHMEM_I(inode);
663 int error;
664
665 error = inode_change_ok(inode, attr);
666 if (error)
667 return error;
668
669 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
670 loff_t oldsize = inode->i_size;
671 loff_t newsize = attr->ia_size;
672
673
674 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
675 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
676 return -EPERM;
677
678 if (newsize != oldsize) {
679 i_size_write(inode, newsize);
680 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
681 }
682 if (newsize < oldsize) {
683 loff_t holebegin = round_up(newsize, PAGE_SIZE);
684 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
685 shmem_truncate_range(inode, newsize, (loff_t)-1);
686
687 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
688 }
689 }
690
691 setattr_copy(inode, attr);
692#ifdef CONFIG_TMPFS_POSIX_ACL
693 if (attr->ia_valid & ATTR_MODE)
694 error = generic_acl_chmod(inode);
695#endif
696 return error;
697}
698
699static void shmem_evict_inode(struct inode *inode)
700{
701 struct shmem_inode_info *info = SHMEM_I(inode);
702
703 if (inode->i_mapping->a_ops == &shmem_aops) {
704 shmem_unacct_size(info->flags, inode->i_size);
705 inode->i_size = 0;
706 shmem_truncate_range(inode, 0, (loff_t)-1);
707 if (!list_empty(&info->swaplist)) {
708 mutex_lock(&shmem_swaplist_mutex);
709 list_del_init(&info->swaplist);
710 mutex_unlock(&shmem_swaplist_mutex);
711 }
712 } else
713 kfree(info->symlink);
714
715 simple_xattrs_free(&info->xattrs);
716 WARN_ON(inode->i_blocks);
717 shmem_free_inode(inode->i_sb);
718 clear_inode(inode);
719}
720
721
722
723
724static int shmem_unuse_inode(struct shmem_inode_info *info,
725 swp_entry_t swap, struct page **pagep)
726{
727 struct address_space *mapping = info->vfs_inode.i_mapping;
728 void *radswap;
729 pgoff_t index;
730 gfp_t gfp;
731 int error = 0;
732
733 radswap = swp_to_radix_entry(swap);
734 index = radix_tree_locate_item(&mapping->page_tree, radswap);
735 if (index == -1)
736 return 0;
737
738
739
740
741
742
743
744 if (shmem_swaplist.next != &info->swaplist)
745 list_move_tail(&shmem_swaplist, &info->swaplist);
746
747 gfp = mapping_gfp_mask(mapping);
748 if (shmem_should_replace_page(*pagep, gfp)) {
749 mutex_unlock(&shmem_swaplist_mutex);
750 error = shmem_replace_page(pagep, gfp, info, index);
751 mutex_lock(&shmem_swaplist_mutex);
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770 if (!page_swapcount(*pagep))
771 error = -ENOENT;
772 }
773
774
775
776
777
778
779 if (!error)
780 error = shmem_add_to_page_cache(*pagep, mapping, index,
781 GFP_NOWAIT, radswap);
782 if (error != -ENOMEM) {
783
784
785
786
787 delete_from_swap_cache(*pagep);
788 set_page_dirty(*pagep);
789 if (!error) {
790 spin_lock(&info->lock);
791 info->swapped--;
792 spin_unlock(&info->lock);
793 swap_free(swap);
794 }
795 error = 1;
796 }
797 return error;
798}
799
800
801
802
803int shmem_unuse(swp_entry_t swap, struct page *page)
804{
805 struct list_head *this, *next;
806 struct shmem_inode_info *info;
807 int found = 0;
808 int error = 0;
809
810
811
812
813
814 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
815 goto out;
816
817
818
819
820
821
822 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
823 if (error)
824 goto out;
825
826
827 mutex_lock(&shmem_swaplist_mutex);
828 list_for_each_safe(this, next, &shmem_swaplist) {
829 info = list_entry(this, struct shmem_inode_info, swaplist);
830 if (info->swapped)
831 found = shmem_unuse_inode(info, swap, &page);
832 else
833 list_del_init(&info->swaplist);
834 cond_resched();
835 if (found)
836 break;
837 }
838 mutex_unlock(&shmem_swaplist_mutex);
839
840 if (found < 0)
841 error = found;
842out:
843 unlock_page(page);
844 page_cache_release(page);
845 return error;
846}
847
848
849
850
851static int shmem_writepage(struct page *page, struct writeback_control *wbc)
852{
853 struct shmem_inode_info *info;
854 struct address_space *mapping;
855 struct inode *inode;
856 swp_entry_t swap;
857 pgoff_t index;
858
859 BUG_ON(!PageLocked(page));
860 mapping = page->mapping;
861 index = page->index;
862 inode = mapping->host;
863 info = SHMEM_I(inode);
864 if (info->flags & VM_LOCKED)
865 goto redirty;
866 if (!total_swap_pages)
867 goto redirty;
868
869
870
871
872
873
874
875
876 if (!wbc->for_reclaim) {
877 WARN_ON_ONCE(1);
878 goto redirty;
879 }
880
881
882
883
884
885
886
887
888
889
890
891
892 if (!PageUptodate(page)) {
893 if (inode->i_private) {
894 struct shmem_falloc *shmem_falloc;
895 spin_lock(&inode->i_lock);
896 shmem_falloc = inode->i_private;
897 if (shmem_falloc &&
898 !shmem_falloc->waitq &&
899 index >= shmem_falloc->start &&
900 index < shmem_falloc->next)
901 shmem_falloc->nr_unswapped++;
902 else
903 shmem_falloc = NULL;
904 spin_unlock(&inode->i_lock);
905 if (shmem_falloc)
906 goto redirty;
907 }
908 clear_highpage(page);
909 flush_dcache_page(page);
910 SetPageUptodate(page);
911 }
912
913 swap = get_swap_page();
914 if (!swap.val)
915 goto redirty;
916
917
918
919
920
921
922
923
924
925 mutex_lock(&shmem_swaplist_mutex);
926 if (list_empty(&info->swaplist))
927 list_add_tail(&info->swaplist, &shmem_swaplist);
928
929 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
930 spin_lock(&info->lock);
931 shmem_recalc_inode(inode);
932 info->swapped++;
933 spin_unlock(&info->lock);
934
935 swap_shmem_alloc(swap);
936 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
937
938 mutex_unlock(&shmem_swaplist_mutex);
939 BUG_ON(page_mapped(page));
940 swap_writepage(page, wbc);
941 return 0;
942 }
943
944 mutex_unlock(&shmem_swaplist_mutex);
945 swapcache_free(swap, NULL);
946redirty:
947 set_page_dirty(page);
948 if (wbc->for_reclaim)
949 return AOP_WRITEPAGE_ACTIVATE;
950 unlock_page(page);
951 return 0;
952}
953
954#ifdef CONFIG_NUMA
955#ifdef CONFIG_TMPFS
956static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
957{
958 char buffer[64];
959
960 if (!mpol || mpol->mode == MPOL_DEFAULT)
961 return;
962
963 mpol_to_str(buffer, sizeof(buffer), mpol);
964
965 seq_printf(seq, ",mpol=%s", buffer);
966}
967
968static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
969{
970 struct mempolicy *mpol = NULL;
971 if (sbinfo->mpol) {
972 spin_lock(&sbinfo->stat_lock);
973 mpol = sbinfo->mpol;
974 mpol_get(mpol);
975 spin_unlock(&sbinfo->stat_lock);
976 }
977 return mpol;
978}
979#endif
980
981static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
982 struct shmem_inode_info *info, pgoff_t index)
983{
984 struct vm_area_struct pvma;
985 struct page *page;
986
987
988 pvma.vm_start = 0;
989
990 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
991 pvma.vm_ops = NULL;
992 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
993
994 page = swapin_readahead(swap, gfp, &pvma, 0);
995
996
997 mpol_cond_put(pvma.vm_policy);
998
999 return page;
1000}
1001
1002static struct page *shmem_alloc_page(gfp_t gfp,
1003 struct shmem_inode_info *info, pgoff_t index)
1004{
1005 struct vm_area_struct pvma;
1006 struct page *page;
1007
1008
1009 pvma.vm_start = 0;
1010
1011 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
1012 pvma.vm_ops = NULL;
1013 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
1014
1015 page = alloc_page_vma(gfp, &pvma, 0);
1016
1017
1018 mpol_cond_put(pvma.vm_policy);
1019
1020 return page;
1021}
1022#else
1023#ifdef CONFIG_TMPFS
1024static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1025{
1026}
1027#endif
1028
1029static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1030 struct shmem_inode_info *info, pgoff_t index)
1031{
1032 return swapin_readahead(swap, gfp, NULL, 0);
1033}
1034
1035static inline struct page *shmem_alloc_page(gfp_t gfp,
1036 struct shmem_inode_info *info, pgoff_t index)
1037{
1038 return alloc_page(gfp);
1039}
1040#endif
1041
1042#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
1043static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1044{
1045 return NULL;
1046}
1047#endif
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
1062{
1063 return page_zonenum(page) > gfp_zone(gfp);
1064}
1065
1066static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1067 struct shmem_inode_info *info, pgoff_t index)
1068{
1069 struct page *oldpage, *newpage;
1070 struct address_space *swap_mapping;
1071 pgoff_t swap_index;
1072 int error;
1073
1074 oldpage = *pagep;
1075 swap_index = page_private(oldpage);
1076 swap_mapping = page_mapping(oldpage);
1077
1078
1079
1080
1081
1082 gfp &= ~GFP_CONSTRAINT_MASK;
1083 newpage = shmem_alloc_page(gfp, info, index);
1084 if (!newpage)
1085 return -ENOMEM;
1086
1087 page_cache_get(newpage);
1088 copy_highpage(newpage, oldpage);
1089 flush_dcache_page(newpage);
1090
1091 __set_page_locked(newpage);
1092 SetPageUptodate(newpage);
1093 SetPageSwapBacked(newpage);
1094 set_page_private(newpage, swap_index);
1095 SetPageSwapCache(newpage);
1096
1097
1098
1099
1100
1101 spin_lock_irq(&swap_mapping->tree_lock);
1102 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1103 newpage);
1104 if (!error) {
1105 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1106 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1107 }
1108 spin_unlock_irq(&swap_mapping->tree_lock);
1109
1110 if (unlikely(error)) {
1111
1112
1113
1114
1115
1116 oldpage = newpage;
1117 } else {
1118 mem_cgroup_replace_page_cache(oldpage, newpage);
1119 lru_cache_add_anon(newpage);
1120 *pagep = newpage;
1121 }
1122
1123 ClearPageSwapCache(oldpage);
1124 set_page_private(oldpage, 0);
1125
1126 unlock_page(oldpage);
1127 page_cache_release(oldpage);
1128 page_cache_release(oldpage);
1129 return error;
1130}
1131
1132
1133
1134
1135
1136
1137
1138
1139static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1140 struct page **pagep, enum sgp_type sgp, gfp_t gfp,
1141 struct vm_area_struct *vma, struct vm_fault *vmf,
1142 int *fault_type)
1143{
1144 struct address_space *mapping = inode->i_mapping;
1145 struct shmem_inode_info *info;
1146 struct page *page;
1147 swp_entry_t swap;
1148 int error;
1149 int once = 0;
1150 int alloced = 0;
1151
1152 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1153 return -EFBIG;
1154repeat:
1155 swap.val = 0;
1156 page = __find_lock_page(mapping, index);
1157 if (radix_tree_exceptional_entry(page)) {
1158 swap = radix_to_swp_entry(page);
1159 page = NULL;
1160 }
1161
1162 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1163 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1164 error = -EINVAL;
1165 goto unlock;
1166 }
1167
1168
1169 if (page && !PageUptodate(page)) {
1170 if (sgp != SGP_READ)
1171 goto clear;
1172 unlock_page(page);
1173 page_cache_release(page);
1174 page = NULL;
1175 }
1176 if (page || (sgp == SGP_READ && !swap.val)) {
1177 *pagep = page;
1178 return 0;
1179 }
1180
1181
1182
1183
1184
1185 info = SHMEM_I(inode);
1186
1187 if (swap.val) {
1188
1189 page = lookup_swap_cache(swap, NULL, 0);
1190 if (!page) {
1191
1192 if (fault_type)
1193 *fault_type |= VM_FAULT_MAJOR;
1194 page = shmem_swapin(swap, gfp, info, index);
1195 if (!page) {
1196 error = -ENOMEM;
1197 goto failed;
1198 }
1199 }
1200
1201
1202 lock_page(page);
1203 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1204 !shmem_confirm_swap(mapping, index, swap)) {
1205 error = -EEXIST;
1206 goto unlock;
1207 }
1208 if (!PageUptodate(page)) {
1209 error = -EIO;
1210 goto failed;
1211 }
1212 wait_on_page_writeback(page);
1213
1214 if (shmem_should_replace_page(page, gfp)) {
1215 error = shmem_replace_page(&page, gfp, info, index);
1216 if (error)
1217 goto failed;
1218 }
1219
1220 error = mem_cgroup_cache_charge(page, current->mm,
1221 gfp & GFP_RECLAIM_MASK);
1222 if (!error) {
1223 error = shmem_add_to_page_cache(page, mapping, index,
1224 gfp, swp_to_radix_entry(swap));
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237 if (error)
1238 delete_from_swap_cache(page);
1239 }
1240 if (error)
1241 goto failed;
1242
1243 spin_lock(&info->lock);
1244 info->swapped--;
1245 shmem_recalc_inode(inode);
1246 spin_unlock(&info->lock);
1247
1248 delete_from_swap_cache(page);
1249 set_page_dirty(page);
1250 swap_free(swap);
1251
1252 } else {
1253 if (vma && userfaultfd_missing(vma)) {
1254 *fault_type = handle_userfault(vmf,
1255 VM_UFFD_MISSING);
1256 return 0;
1257 }
1258 if (!shmem_inode_acct_block(inode, 1)) {
1259 error = -ENOSPC;
1260
1261 goto failed;
1262 }
1263
1264 page = shmem_alloc_page(gfp, info, index);
1265 if (!page) {
1266 error = -ENOMEM;
1267 goto decused;
1268 }
1269
1270 SetPageSwapBacked(page);
1271 __set_page_locked(page);
1272 error = mem_cgroup_cache_charge(page, current->mm,
1273 gfp & GFP_RECLAIM_MASK);
1274 if (error)
1275 goto decused;
1276 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1277 if (!error) {
1278 error = shmem_add_to_page_cache(page, mapping, index,
1279 gfp, NULL);
1280 radix_tree_preload_end();
1281 }
1282 if (error) {
1283 mem_cgroup_uncharge_cache_page(page);
1284 goto decused;
1285 }
1286 lru_cache_add_anon(page);
1287
1288 spin_lock(&info->lock);
1289 info->alloced++;
1290 inode->i_blocks += BLOCKS_PER_PAGE;
1291 shmem_recalc_inode(inode);
1292 spin_unlock(&info->lock);
1293 alloced = true;
1294
1295
1296
1297
1298 if (sgp == SGP_FALLOC)
1299 sgp = SGP_WRITE;
1300clear:
1301
1302
1303
1304
1305
1306 if (sgp != SGP_WRITE) {
1307 clear_highpage(page);
1308 flush_dcache_page(page);
1309 SetPageUptodate(page);
1310 }
1311 if (sgp == SGP_DIRTY)
1312 set_page_dirty(page);
1313 }
1314
1315
1316 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1317 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1318 if (alloced) {
1319 ClearPageDirty(page);
1320 delete_from_page_cache(page);
1321 spin_lock(&info->lock);
1322 shmem_recalc_inode(inode);
1323 spin_unlock(&info->lock);
1324 }
1325 error = -EINVAL;
1326 goto unlock;
1327 }
1328 *pagep = page;
1329 return 0;
1330
1331
1332
1333
1334decused:
1335 shmem_inode_unacct_blocks(inode, 1);
1336failed:
1337 if (swap.val && !shmem_confirm_swap(mapping, index, swap))
1338 error = -EEXIST;
1339unlock:
1340 if (page) {
1341 unlock_page(page);
1342 page_cache_release(page);
1343 }
1344 if (error == -ENOSPC && !once++) {
1345 info = SHMEM_I(inode);
1346 spin_lock(&info->lock);
1347 shmem_recalc_inode(inode);
1348 spin_unlock(&info->lock);
1349 goto repeat;
1350 }
1351 if (error == -EEXIST)
1352 goto repeat;
1353 return error;
1354}
1355
1356
1357
1358
1359
1360
1361static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
1362{
1363 int ret = default_wake_function(wait, mode, sync, key);
1364 list_del_init(&wait->task_list);
1365 return ret;
1366}
1367
1368static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1369{
1370 struct inode *inode = file_inode(vma->vm_file);
1371 gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
1372 int error;
1373 int ret = VM_FAULT_LOCKED;
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 if (unlikely(inode->i_private)) {
1393 struct shmem_falloc *shmem_falloc;
1394
1395 spin_lock(&inode->i_lock);
1396 shmem_falloc = inode->i_private;
1397 if (shmem_falloc &&
1398 shmem_falloc->waitq &&
1399 vmf->pgoff >= shmem_falloc->start &&
1400 vmf->pgoff < shmem_falloc->next) {
1401 wait_queue_head_t *shmem_falloc_waitq;
1402 DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
1403
1404 ret = VM_FAULT_NOPAGE;
1405 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1406 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1407
1408 up_read(&vma->vm_mm->mmap_sem);
1409 ret = VM_FAULT_RETRY;
1410 }
1411
1412 shmem_falloc_waitq = shmem_falloc->waitq;
1413 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1414 TASK_UNINTERRUPTIBLE);
1415 spin_unlock(&inode->i_lock);
1416 schedule();
1417
1418
1419
1420
1421
1422
1423
1424
1425 spin_lock(&inode->i_lock);
1426 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1427 spin_unlock(&inode->i_lock);
1428 return ret;
1429 }
1430 spin_unlock(&inode->i_lock);
1431 }
1432
1433 error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
1434 gfp, vma, vmf, &ret);
1435 if (error)
1436 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1437
1438 if (ret & VM_FAULT_MAJOR) {
1439 count_vm_event(PGMAJFAULT);
1440 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1441 }
1442 return ret;
1443}
1444
1445#ifdef CONFIG_NUMA
1446static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1447{
1448 struct inode *inode = file_inode(vma->vm_file);
1449 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1450}
1451
1452static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1453 unsigned long addr)
1454{
1455 struct inode *inode = file_inode(vma->vm_file);
1456 pgoff_t index;
1457
1458 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1459 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1460}
1461#endif
1462
1463int shmem_lock(struct file *file, int lock, struct user_struct *user)
1464{
1465 struct inode *inode = file_inode(file);
1466 struct shmem_inode_info *info = SHMEM_I(inode);
1467 int retval = -ENOMEM;
1468
1469 spin_lock(&info->lock);
1470 if (lock && !(info->flags & VM_LOCKED)) {
1471 if (!user_shm_lock(inode->i_size, user))
1472 goto out_nomem;
1473 info->flags |= VM_LOCKED;
1474 mapping_set_unevictable(file->f_mapping);
1475 }
1476 if (!lock && (info->flags & VM_LOCKED) && user) {
1477 user_shm_unlock(inode->i_size, user);
1478 info->flags &= ~VM_LOCKED;
1479 mapping_clear_unevictable(file->f_mapping);
1480 }
1481 retval = 0;
1482
1483out_nomem:
1484 spin_unlock(&info->lock);
1485 return retval;
1486}
1487
1488static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1489{
1490 file_accessed(file);
1491 vma->vm_ops = &shmem_vm_ops;
1492 return 0;
1493}
1494
1495static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1496 umode_t mode, dev_t dev, unsigned long flags)
1497{
1498 struct inode *inode;
1499 struct shmem_inode_info *info;
1500 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1501
1502 if (shmem_reserve_inode(sb))
1503 return NULL;
1504
1505 inode = new_inode(sb);
1506 if (inode) {
1507 inode->i_ino = get_next_ino();
1508 inode_init_owner(inode, dir, mode);
1509 inode->i_blocks = 0;
1510 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1511 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1512 inode->i_generation = get_seconds();
1513 info = SHMEM_I(inode);
1514 memset(info, 0, (char *)inode - (char *)info);
1515 spin_lock_init(&info->lock);
1516 info->seals = F_SEAL_SEAL;
1517 info->flags = flags & VM_NORESERVE;
1518 INIT_LIST_HEAD(&info->swaplist);
1519 simple_xattrs_init(&info->xattrs);
1520 cache_no_acl(inode);
1521
1522 switch (mode & S_IFMT) {
1523 default:
1524 inode->i_op = &shmem_special_inode_operations;
1525 init_special_inode(inode, mode, dev);
1526 break;
1527 case S_IFREG:
1528 inode->i_mapping->a_ops = &shmem_aops;
1529 inode->i_op = &shmem_inode_operations;
1530 inode->i_fop = &shmem_file_operations;
1531 mpol_shared_policy_init(&info->policy,
1532 shmem_get_sbmpol(sbinfo));
1533 break;
1534 case S_IFDIR:
1535 inc_nlink(inode);
1536
1537 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1538 inode->i_op = &shmem_dir_inode_operations.ops;
1539 inode->i_fop = &simple_dir_operations;
1540 inode->i_flags |= S_IOPS_WRAPPER;
1541 break;
1542 case S_IFLNK:
1543
1544
1545
1546
1547 mpol_shared_policy_init(&info->policy, NULL);
1548 break;
1549 }
1550 } else
1551 shmem_free_inode(sb);
1552 return inode;
1553}
1554
1555bool shmem_mapping(struct address_space *mapping)
1556{
1557 return mapping->backing_dev_info == &shmem_backing_dev_info;
1558}
1559
1560static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
1561 pmd_t *dst_pmd,
1562 struct vm_area_struct *dst_vma,
1563 unsigned long dst_addr,
1564 unsigned long src_addr,
1565 bool zeropage,
1566 struct page **pagep)
1567{
1568 struct inode *inode = file_inode(dst_vma->vm_file);
1569 struct shmem_inode_info *info = SHMEM_I(inode);
1570 struct address_space *mapping = inode->i_mapping;
1571 gfp_t gfp = mapping_gfp_mask(mapping);
1572 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
1573 spinlock_t *ptl;
1574 void *page_kaddr;
1575 struct page *page;
1576 pte_t _dst_pte, *dst_pte;
1577 int ret;
1578 pgoff_t offset, max_off;
1579
1580 ret = -ENOMEM;
1581 if (!shmem_inode_acct_block(inode, 1))
1582 goto out;
1583
1584 if (!*pagep) {
1585 page = shmem_alloc_page(gfp, info, pgoff);
1586 if (!page)
1587 goto out_unacct_blocks;
1588
1589 if (!zeropage) {
1590 page_kaddr = kmap_atomic(page);
1591 ret = copy_from_user(page_kaddr,
1592 (const void __user *)src_addr,
1593 PAGE_SIZE);
1594 kunmap_atomic(page_kaddr);
1595
1596
1597 if (unlikely(ret)) {
1598 *pagep = page;
1599 shmem_inode_unacct_blocks(inode, 1);
1600
1601 return -ENOENT;
1602 }
1603 } else {
1604 clear_highpage(page);
1605 }
1606 } else {
1607 page = *pagep;
1608 *pagep = NULL;
1609 }
1610
1611 VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
1612 __set_page_locked(page);
1613 __SetPageSwapBacked(page);
1614 __SetPageUptodate(page);
1615
1616 ret = -EFAULT;
1617 offset = linear_page_index(dst_vma, dst_addr);
1618 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1619 if (unlikely(offset >= max_off))
1620 goto out_release;
1621
1622 ret = mem_cgroup_cache_charge(page, dst_mm,
1623 gfp & GFP_RECLAIM_MASK);
1624 if (ret)
1625 goto out_release;
1626
1627 ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1628 if (!ret) {
1629 ret = shmem_add_to_page_cache(page, mapping, pgoff, gfp, NULL);
1630 radix_tree_preload_end();
1631 }
1632 if (ret)
1633 goto out_release_uncharge;
1634
1635 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
1636 if (dst_vma->vm_flags & VM_WRITE)
1637 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
1638 else {
1639
1640
1641
1642
1643
1644
1645
1646 set_page_dirty(page);
1647 }
1648
1649 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
1650
1651 ret = -EFAULT;
1652 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1653 if (unlikely(offset >= max_off))
1654 goto out_release_uncharge_unlock;
1655
1656 ret = -EEXIST;
1657 if (!pte_none(*dst_pte))
1658 goto out_release_uncharge_unlock;
1659
1660 lru_cache_add_anon(page);
1661
1662 spin_lock(&info->lock);
1663 info->alloced++;
1664 inode->i_blocks += BLOCKS_PER_PAGE;
1665 shmem_recalc_inode(inode);
1666 spin_unlock(&info->lock);
1667
1668 inc_mm_counter(dst_mm, mm_counter_file(page));
1669 page_add_file_rmap(page);
1670 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
1671
1672
1673 update_mmu_cache(dst_vma, dst_addr, dst_pte);
1674 pte_unmap_unlock(dst_pte, ptl);
1675 unlock_page(page);
1676 ret = 0;
1677out:
1678 return ret;
1679out_release_uncharge_unlock:
1680 pte_unmap_unlock(dst_pte, ptl);
1681 ClearPageDirty(page);
1682 delete_from_page_cache(page);
1683out_release_uncharge:
1684 mem_cgroup_uncharge_cache_page(page);
1685out_release:
1686 unlock_page(page);
1687 put_page(page);
1688out_unacct_blocks:
1689 shmem_inode_unacct_blocks(inode, 1);
1690 goto out;
1691}
1692
1693int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
1694 pmd_t *dst_pmd,
1695 struct vm_area_struct *dst_vma,
1696 unsigned long dst_addr,
1697 unsigned long src_addr,
1698 struct page **pagep)
1699{
1700 return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
1701 dst_addr, src_addr, false, pagep);
1702}
1703
1704int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
1705 pmd_t *dst_pmd,
1706 struct vm_area_struct *dst_vma,
1707 unsigned long dst_addr)
1708{
1709 struct page *page = NULL;
1710
1711 return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
1712 dst_addr, 0, true, &page);
1713}
1714
1715#ifdef CONFIG_TMPFS
1716static const struct inode_operations shmem_symlink_inode_operations;
1717static const struct inode_operations shmem_short_symlink_operations;
1718
1719#ifdef CONFIG_TMPFS_XATTR
1720static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1721#else
1722#define shmem_initxattrs NULL
1723#endif
1724
1725static int
1726shmem_write_begin(struct file *file, struct address_space *mapping,
1727 loff_t pos, unsigned len, unsigned flags,
1728 struct page **pagep, void **fsdata)
1729{
1730 struct inode *inode = mapping->host;
1731 struct shmem_inode_info *info = SHMEM_I(inode);
1732 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1733
1734
1735 if (unlikely(info->seals)) {
1736 if (info->seals & F_SEAL_WRITE)
1737 return -EPERM;
1738 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1739 return -EPERM;
1740 }
1741
1742 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1743}
1744
1745static int
1746shmem_write_end(struct file *file, struct address_space *mapping,
1747 loff_t pos, unsigned len, unsigned copied,
1748 struct page *page, void *fsdata)
1749{
1750 struct inode *inode = mapping->host;
1751
1752 if (pos + copied > inode->i_size)
1753 i_size_write(inode, pos + copied);
1754
1755 if (!PageUptodate(page)) {
1756 if (copied < PAGE_CACHE_SIZE) {
1757 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1758 zero_user_segments(page, 0, from,
1759 from + copied, PAGE_CACHE_SIZE);
1760 }
1761 SetPageUptodate(page);
1762 }
1763 set_page_dirty(page);
1764 unlock_page(page);
1765 page_cache_release(page);
1766
1767 return copied;
1768}
1769
1770static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1771{
1772 struct inode *inode = file_inode(filp);
1773 struct address_space *mapping = inode->i_mapping;
1774 pgoff_t index;
1775 unsigned long offset;
1776 enum sgp_type sgp = SGP_READ;
1777
1778
1779
1780
1781
1782
1783 if (segment_eq(get_fs(), KERNEL_DS))
1784 sgp = SGP_DIRTY;
1785
1786 index = *ppos >> PAGE_CACHE_SHIFT;
1787 offset = *ppos & ~PAGE_CACHE_MASK;
1788
1789 for (;;) {
1790 struct page *page = NULL;
1791 pgoff_t end_index;
1792 unsigned long nr, ret;
1793 loff_t i_size = i_size_read(inode);
1794
1795 end_index = i_size >> PAGE_CACHE_SHIFT;
1796 if (index > end_index)
1797 break;
1798 if (index == end_index) {
1799 nr = i_size & ~PAGE_CACHE_MASK;
1800 if (nr <= offset)
1801 break;
1802 }
1803
1804 desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1805 if (desc->error) {
1806 if (desc->error == -EINVAL)
1807 desc->error = 0;
1808 break;
1809 }
1810 if (page)
1811 unlock_page(page);
1812
1813
1814
1815
1816
1817 nr = PAGE_CACHE_SIZE;
1818 i_size = i_size_read(inode);
1819 end_index = i_size >> PAGE_CACHE_SHIFT;
1820 if (index == end_index) {
1821 nr = i_size & ~PAGE_CACHE_MASK;
1822 if (nr <= offset) {
1823 if (page)
1824 page_cache_release(page);
1825 break;
1826 }
1827 }
1828 nr -= offset;
1829
1830 if (page) {
1831
1832
1833
1834
1835
1836 if (mapping_writably_mapped(mapping))
1837 flush_dcache_page(page);
1838
1839
1840
1841 if (!offset)
1842 mark_page_accessed(page);
1843 } else {
1844 page = ZERO_PAGE(0);
1845 page_cache_get(page);
1846 }
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858 ret = actor(desc, page, offset, nr);
1859 offset += ret;
1860 index += offset >> PAGE_CACHE_SHIFT;
1861 offset &= ~PAGE_CACHE_MASK;
1862
1863 page_cache_release(page);
1864 if (ret != nr || !desc->count)
1865 break;
1866
1867 cond_resched();
1868 }
1869
1870 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1871 file_accessed(filp);
1872}
1873
1874static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1875 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1876{
1877 struct file *filp = iocb->ki_filp;
1878 ssize_t retval;
1879 unsigned long seg;
1880 size_t count;
1881 loff_t *ppos = &iocb->ki_pos;
1882
1883 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1884 if (retval)
1885 return retval;
1886
1887 for (seg = 0; seg < nr_segs; seg++) {
1888 read_descriptor_t desc;
1889
1890 desc.written = 0;
1891 desc.arg.buf = iov[seg].iov_base;
1892 desc.count = iov[seg].iov_len;
1893 if (desc.count == 0)
1894 continue;
1895 desc.error = 0;
1896 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1897 retval += desc.written;
1898 if (desc.error) {
1899 retval = retval ?: desc.error;
1900 break;
1901 }
1902 if (desc.count > 0)
1903 break;
1904 }
1905 return retval;
1906}
1907
1908static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1909 struct pipe_inode_info *pipe, size_t len,
1910 unsigned int flags)
1911{
1912 struct address_space *mapping = in->f_mapping;
1913 struct inode *inode = mapping->host;
1914 unsigned int loff, nr_pages, req_pages;
1915 struct page *pages[PIPE_DEF_BUFFERS];
1916 struct partial_page partial[PIPE_DEF_BUFFERS];
1917 struct page *page;
1918 pgoff_t index, end_index;
1919 loff_t isize, left;
1920 int error, page_nr;
1921 struct splice_pipe_desc spd = {
1922 .pages = pages,
1923 .partial = partial,
1924 .nr_pages_max = PIPE_DEF_BUFFERS,
1925 .flags = flags,
1926 .ops = &page_cache_pipe_buf_ops,
1927 .spd_release = spd_release_page,
1928 };
1929
1930 isize = i_size_read(inode);
1931 if (unlikely(*ppos >= isize))
1932 return 0;
1933
1934 left = isize - *ppos;
1935 if (unlikely(left < len))
1936 len = left;
1937
1938 if (splice_grow_spd(pipe, &spd))
1939 return -ENOMEM;
1940
1941 index = *ppos >> PAGE_CACHE_SHIFT;
1942 loff = *ppos & ~PAGE_CACHE_MASK;
1943 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1944 nr_pages = min(req_pages, pipe->buffers);
1945
1946 spd.nr_pages = find_get_pages_contig(mapping, index,
1947 nr_pages, spd.pages);
1948 index += spd.nr_pages;
1949 error = 0;
1950
1951 while (spd.nr_pages < nr_pages) {
1952 error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1953 if (error)
1954 break;
1955 unlock_page(page);
1956 spd.pages[spd.nr_pages++] = page;
1957 index++;
1958 }
1959
1960 index = *ppos >> PAGE_CACHE_SHIFT;
1961 nr_pages = spd.nr_pages;
1962 spd.nr_pages = 0;
1963
1964 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1965 unsigned int this_len;
1966
1967 if (!len)
1968 break;
1969
1970 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1971 page = spd.pages[page_nr];
1972
1973 if (!PageUptodate(page) || page->mapping != mapping) {
1974 error = shmem_getpage(inode, index, &page,
1975 SGP_CACHE, NULL);
1976 if (error)
1977 break;
1978 unlock_page(page);
1979 page_cache_release(spd.pages[page_nr]);
1980 spd.pages[page_nr] = page;
1981 }
1982
1983 isize = i_size_read(inode);
1984 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1985 if (unlikely(!isize || index > end_index))
1986 break;
1987
1988 if (end_index == index) {
1989 unsigned int plen;
1990
1991 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1992 if (plen <= loff)
1993 break;
1994
1995 this_len = min(this_len, plen - loff);
1996 len = this_len;
1997 }
1998
1999 spd.partial[page_nr].offset = loff;
2000 spd.partial[page_nr].len = this_len;
2001 len -= this_len;
2002 loff = 0;
2003 spd.nr_pages++;
2004 index++;
2005 }
2006
2007 while (page_nr < nr_pages)
2008 page_cache_release(spd.pages[page_nr++]);
2009
2010 if (spd.nr_pages)
2011 error = splice_to_pipe(pipe, &spd);
2012
2013 splice_shrink_spd(&spd);
2014
2015 if (error > 0) {
2016 *ppos += error;
2017 file_accessed(in);
2018 }
2019 return error;
2020}
2021
2022
2023
2024
2025static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
2026 pgoff_t index, pgoff_t end, int whence)
2027{
2028 struct page *page;
2029 struct pagevec pvec;
2030 pgoff_t indices[PAGEVEC_SIZE];
2031 bool done = false;
2032 int i;
2033
2034 pagevec_init(&pvec, 0);
2035 pvec.nr = 1;
2036 while (!done) {
2037 pvec.nr = __find_get_pages(mapping, index,
2038 pvec.nr, pvec.pages, indices);
2039 if (!pvec.nr) {
2040 if (whence == SEEK_DATA)
2041 index = end;
2042 break;
2043 }
2044 for (i = 0; i < pvec.nr; i++, index++) {
2045 if (index < indices[i]) {
2046 if (whence == SEEK_HOLE) {
2047 done = true;
2048 break;
2049 }
2050 index = indices[i];
2051 }
2052 page = pvec.pages[i];
2053 if (page && !radix_tree_exceptional_entry(page)) {
2054 if (!PageUptodate(page))
2055 page = NULL;
2056 }
2057 if (index >= end ||
2058 (page && whence == SEEK_DATA) ||
2059 (!page && whence == SEEK_HOLE)) {
2060 done = true;
2061 break;
2062 }
2063 }
2064 pagevec_remove_exceptionals(&pvec);
2065 pagevec_release(&pvec);
2066 pvec.nr = PAGEVEC_SIZE;
2067 cond_resched();
2068 }
2069 return index;
2070}
2071
2072static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
2073{
2074 struct address_space *mapping = file->f_mapping;
2075 struct inode *inode = mapping->host;
2076 pgoff_t start, end;
2077 loff_t new_offset;
2078
2079 if (whence != SEEK_DATA && whence != SEEK_HOLE)
2080 return generic_file_llseek_size(file, offset, whence,
2081 MAX_LFS_FILESIZE, i_size_read(inode));
2082 mutex_lock(&inode->i_mutex);
2083
2084
2085 if (offset < 0)
2086 offset = -EINVAL;
2087 else if (offset >= inode->i_size)
2088 offset = -ENXIO;
2089 else {
2090 start = offset >> PAGE_CACHE_SHIFT;
2091 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2092 new_offset = shmem_seek_hole_data(mapping, start, end, whence);
2093 new_offset <<= PAGE_CACHE_SHIFT;
2094 if (new_offset > offset) {
2095 if (new_offset < inode->i_size)
2096 offset = new_offset;
2097 else if (whence == SEEK_DATA)
2098 offset = -ENXIO;
2099 else
2100 offset = inode->i_size;
2101 }
2102 }
2103
2104 if (offset >= 0)
2105 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
2106 mutex_unlock(&inode->i_mutex);
2107 return offset;
2108}
2109
2110
2111
2112
2113
2114#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
2115#define LAST_SCAN 4
2116
2117static void shmem_tag_pins(struct address_space *mapping)
2118{
2119 struct radix_tree_iter iter;
2120 void **slot;
2121 pgoff_t start;
2122 struct page *page;
2123
2124 lru_add_drain();
2125 start = 0;
2126 rcu_read_lock();
2127
2128 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
2129 page = radix_tree_deref_slot(slot);
2130 if (!page || radix_tree_exception(page)) {
2131 if (radix_tree_deref_retry(page)) {
2132 slot = radix_tree_iter_retry(&iter);
2133 continue;
2134 }
2135 } else if (page_count(page) - page_mapcount(page) > 1) {
2136 spin_lock_irq(&mapping->tree_lock);
2137 radix_tree_tag_set(&mapping->page_tree, iter.index,
2138 SHMEM_TAG_PINNED);
2139 spin_unlock_irq(&mapping->tree_lock);
2140 }
2141
2142 if (need_resched()) {
2143 cond_resched_rcu();
2144 slot = radix_tree_iter_next(&iter);
2145 }
2146 }
2147 rcu_read_unlock();
2148}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159static int shmem_wait_for_pins(struct address_space *mapping)
2160{
2161 struct radix_tree_iter iter;
2162 void **slot;
2163 pgoff_t start;
2164 struct page *page;
2165 int error, scan;
2166
2167 shmem_tag_pins(mapping);
2168
2169 error = 0;
2170 for (scan = 0; scan <= LAST_SCAN; scan++) {
2171 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
2172 break;
2173
2174 if (!scan)
2175 lru_add_drain_all();
2176 else if (schedule_timeout_killable((HZ << scan) / 200))
2177 scan = LAST_SCAN;
2178
2179 start = 0;
2180 rcu_read_lock();
2181 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
2182 start, SHMEM_TAG_PINNED) {
2183
2184 page = radix_tree_deref_slot(slot);
2185 if (radix_tree_exception(page)) {
2186 if (radix_tree_deref_retry(page)) {
2187 slot = radix_tree_iter_retry(&iter);
2188 continue;
2189 }
2190
2191 page = NULL;
2192 }
2193
2194 if (page &&
2195 page_count(page) - page_mapcount(page) != 1) {
2196 if (scan < LAST_SCAN)
2197 goto continue_resched;
2198
2199
2200
2201
2202
2203
2204 error = -EBUSY;
2205 }
2206
2207 spin_lock_irq(&mapping->tree_lock);
2208 radix_tree_tag_clear(&mapping->page_tree,
2209 iter.index, SHMEM_TAG_PINNED);
2210 spin_unlock_irq(&mapping->tree_lock);
2211continue_resched:
2212 if (need_resched()) {
2213 cond_resched_rcu();
2214 slot = radix_tree_iter_next(&iter);
2215 }
2216 }
2217 rcu_read_unlock();
2218 }
2219
2220 return error;
2221}
2222
2223#define F_ALL_SEALS (F_SEAL_SEAL | \
2224 F_SEAL_SHRINK | \
2225 F_SEAL_GROW | \
2226 F_SEAL_WRITE)
2227
2228int shmem_add_seals(struct file *file, unsigned int seals)
2229{
2230 struct inode *inode = file_inode(file);
2231 struct shmem_inode_info *info = SHMEM_I(inode);
2232 int error;
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264 if (file->f_op != &shmem_file_operations)
2265 return -EINVAL;
2266 if (!(file->f_mode & FMODE_WRITE))
2267 return -EPERM;
2268 if (seals & ~(unsigned int)F_ALL_SEALS)
2269 return -EINVAL;
2270
2271 mutex_lock(&inode->i_mutex);
2272
2273 if (info->seals & F_SEAL_SEAL) {
2274 error = -EPERM;
2275 goto unlock;
2276 }
2277
2278 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
2279 error = mapping_deny_writable(file->f_mapping);
2280 if (error)
2281 goto unlock;
2282
2283 error = shmem_wait_for_pins(file->f_mapping);
2284 if (error) {
2285 mapping_allow_writable(file->f_mapping);
2286 goto unlock;
2287 }
2288 }
2289
2290 info->seals |= seals;
2291 error = 0;
2292
2293unlock:
2294 mutex_unlock(&inode->i_mutex);
2295 return error;
2296}
2297EXPORT_SYMBOL_GPL(shmem_add_seals);
2298
2299int shmem_get_seals(struct file *file)
2300{
2301 if (file->f_op != &shmem_file_operations)
2302 return -EINVAL;
2303
2304 return SHMEM_I(file_inode(file))->seals;
2305}
2306EXPORT_SYMBOL_GPL(shmem_get_seals);
2307
2308long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2309{
2310 long error;
2311
2312 switch (cmd) {
2313 case F_ADD_SEALS:
2314
2315 if (arg > UINT_MAX)
2316 return -EINVAL;
2317
2318 error = shmem_add_seals(file, arg);
2319 break;
2320 case F_GET_SEALS:
2321 error = shmem_get_seals(file);
2322 break;
2323 default:
2324 error = -EINVAL;
2325 break;
2326 }
2327
2328 return error;
2329}
2330
2331static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2332 loff_t len)
2333{
2334 struct inode *inode = file_inode(file);
2335 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2336 struct shmem_inode_info *info = SHMEM_I(inode);
2337 struct shmem_falloc shmem_falloc;
2338 pgoff_t start, index, end;
2339 int error;
2340
2341 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2342 return -EOPNOTSUPP;
2343
2344 mutex_lock(&inode->i_mutex);
2345
2346 if (mode & FALLOC_FL_PUNCH_HOLE) {
2347 struct address_space *mapping = file->f_mapping;
2348 loff_t unmap_start = round_up(offset, PAGE_SIZE);
2349 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
2350 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
2351
2352
2353 if (info->seals & F_SEAL_WRITE) {
2354 error = -EPERM;
2355 goto out;
2356 }
2357
2358 shmem_falloc.waitq = &shmem_falloc_waitq;
2359 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
2360 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
2361 spin_lock(&inode->i_lock);
2362 inode->i_private = &shmem_falloc;
2363 spin_unlock(&inode->i_lock);
2364
2365 if ((u64)unmap_end > (u64)unmap_start)
2366 unmap_mapping_range(mapping, unmap_start,
2367 1 + unmap_end - unmap_start, 0);
2368 shmem_truncate_range(inode, offset, offset + len - 1);
2369
2370
2371 spin_lock(&inode->i_lock);
2372 inode->i_private = NULL;
2373 wake_up_all(&shmem_falloc_waitq);
2374 WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
2375 spin_unlock(&inode->i_lock);
2376 error = 0;
2377 goto out;
2378 }
2379
2380
2381 error = inode_newsize_ok(inode, offset + len);
2382 if (error)
2383 goto out;
2384
2385 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2386 error = -EPERM;
2387 goto out;
2388 }
2389
2390 start = offset >> PAGE_CACHE_SHIFT;
2391 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2392
2393 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
2394 error = -ENOSPC;
2395 goto out;
2396 }
2397
2398 shmem_falloc.waitq = NULL;
2399 shmem_falloc.start = start;
2400 shmem_falloc.next = start;
2401 shmem_falloc.nr_falloced = 0;
2402 shmem_falloc.nr_unswapped = 0;
2403 spin_lock(&inode->i_lock);
2404 inode->i_private = &shmem_falloc;
2405 spin_unlock(&inode->i_lock);
2406
2407 for (index = start; index < end; index++) {
2408 struct page *page;
2409
2410
2411
2412
2413
2414 if (signal_pending(current))
2415 error = -EINTR;
2416 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
2417 error = -ENOMEM;
2418 else
2419 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
2420 NULL);
2421 if (error) {
2422
2423 shmem_undo_range(inode,
2424 (loff_t)start << PAGE_CACHE_SHIFT,
2425 (loff_t)index << PAGE_CACHE_SHIFT, true);
2426 goto undone;
2427 }
2428
2429
2430
2431
2432
2433 shmem_falloc.next++;
2434 if (!PageUptodate(page))
2435 shmem_falloc.nr_falloced++;
2436
2437
2438
2439
2440
2441
2442
2443
2444 set_page_dirty(page);
2445 unlock_page(page);
2446 page_cache_release(page);
2447 cond_resched();
2448 }
2449
2450 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
2451 i_size_write(inode, offset + len);
2452 inode->i_ctime = CURRENT_TIME;
2453undone:
2454 spin_lock(&inode->i_lock);
2455 inode->i_private = NULL;
2456 spin_unlock(&inode->i_lock);
2457out:
2458 mutex_unlock(&inode->i_mutex);
2459 return error;
2460}
2461
2462static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
2463{
2464 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
2465
2466 buf->f_type = TMPFS_MAGIC;
2467 buf->f_bsize = PAGE_CACHE_SIZE;
2468 buf->f_namelen = NAME_MAX;
2469 if (sbinfo->max_blocks) {
2470 buf->f_blocks = sbinfo->max_blocks;
2471 buf->f_bavail =
2472 buf->f_bfree = sbinfo->max_blocks -
2473 percpu_counter_sum(&sbinfo->used_blocks);
2474 }
2475 if (sbinfo->max_inodes) {
2476 buf->f_files = sbinfo->max_inodes;
2477 buf->f_ffree = sbinfo->free_inodes;
2478 }
2479
2480 return 0;
2481}
2482
2483
2484
2485
2486static int
2487shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2488{
2489 struct inode *inode;
2490 int error = -ENOSPC;
2491
2492 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
2493 if (inode) {
2494 error = security_inode_init_security(inode, dir,
2495 &dentry->d_name,
2496 shmem_initxattrs, NULL);
2497 if (error) {
2498 if (error != -EOPNOTSUPP) {
2499 iput(inode);
2500 return error;
2501 }
2502 }
2503#ifdef CONFIG_TMPFS_POSIX_ACL
2504 error = generic_acl_init(inode, dir);
2505 if (error) {
2506 iput(inode);
2507 return error;
2508 }
2509#else
2510 error = 0;
2511#endif
2512 dir->i_size += BOGO_DIRENT_SIZE;
2513 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2514 d_instantiate(dentry, inode);
2515 dget(dentry);
2516 }
2517 return error;
2518}
2519
2520static int
2521shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2522{
2523 struct inode *inode;
2524 int error = -ENOSPC;
2525
2526 inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
2527 if (inode) {
2528 error = security_inode_init_security(inode, dir,
2529 NULL,
2530 shmem_initxattrs, NULL);
2531 if (error) {
2532 if (error != -EOPNOTSUPP) {
2533 iput(inode);
2534 return error;
2535 }
2536 }
2537#ifdef CONFIG_TMPFS_POSIX_ACL
2538 error = generic_acl_init(inode, dir);
2539 if (error) {
2540 iput(inode);
2541 return error;
2542 }
2543#else
2544 error = 0;
2545#endif
2546 d_tmpfile(dentry, inode);
2547 }
2548 return error;
2549}
2550
2551static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2552{
2553 int error;
2554
2555 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
2556 return error;
2557 inc_nlink(dir);
2558 return 0;
2559}
2560
2561static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2562 bool excl)
2563{
2564 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
2565}
2566
2567
2568
2569
2570static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2571{
2572 struct inode *inode = old_dentry->d_inode;
2573 int ret;
2574
2575
2576
2577
2578
2579
2580 ret = shmem_reserve_inode(inode->i_sb);
2581 if (ret)
2582 goto out;
2583
2584 dir->i_size += BOGO_DIRENT_SIZE;
2585 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2586 inc_nlink(inode);
2587 ihold(inode);
2588 dget(dentry);
2589 d_instantiate(dentry, inode);
2590out:
2591 return ret;
2592}
2593
2594static int shmem_unlink(struct inode *dir, struct dentry *dentry)
2595{
2596 struct inode *inode = dentry->d_inode;
2597
2598 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
2599 shmem_free_inode(inode->i_sb);
2600
2601 dir->i_size -= BOGO_DIRENT_SIZE;
2602 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2603 drop_nlink(inode);
2604 dput(dentry);
2605 return 0;
2606}
2607
2608static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
2609{
2610 if (!simple_empty(dentry))
2611 return -ENOTEMPTY;
2612
2613 drop_nlink(dentry->d_inode);
2614 drop_nlink(dir);
2615 return shmem_unlink(dir, dentry);
2616}
2617
2618static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2619{
2620 bool old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2621 bool new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode);
2622
2623 if (old_dir != new_dir && old_is_dir != new_is_dir) {
2624 if (old_is_dir) {
2625 drop_nlink(old_dir);
2626 inc_nlink(new_dir);
2627 } else {
2628 drop_nlink(new_dir);
2629 inc_nlink(old_dir);
2630 }
2631 }
2632 old_dir->i_ctime = old_dir->i_mtime =
2633 new_dir->i_ctime = new_dir->i_mtime =
2634 old_dentry->d_inode->i_ctime =
2635 new_dentry->d_inode->i_ctime = CURRENT_TIME;
2636
2637 return 0;
2638}
2639
2640static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2641{
2642 struct dentry *whiteout;
2643 int error;
2644
2645 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2646 if (!whiteout)
2647 return -ENOMEM;
2648
2649 error = shmem_mknod(old_dir, whiteout,
2650 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2651 dput(whiteout);
2652 if (error)
2653 return error;
2654
2655
2656
2657
2658
2659
2660
2661
2662 d_rehash(whiteout);
2663 return 0;
2664}
2665
2666
2667
2668
2669
2670
2671
2672static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
2673{
2674 struct inode *inode = old_dentry->d_inode;
2675 int they_are_dirs = S_ISDIR(inode->i_mode);
2676
2677 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2678 return -EINVAL;
2679
2680 if (flags & RENAME_EXCHANGE)
2681 return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
2682
2683 if (!simple_empty(new_dentry))
2684 return -ENOTEMPTY;
2685
2686 if (flags & RENAME_WHITEOUT) {
2687 int error;
2688
2689 error = shmem_whiteout(old_dir, old_dentry);
2690 if (error)
2691 return error;
2692 }
2693
2694 if (new_dentry->d_inode) {
2695 (void) shmem_unlink(new_dir, new_dentry);
2696 if (they_are_dirs)
2697 drop_nlink(old_dir);
2698 } else if (they_are_dirs) {
2699 drop_nlink(old_dir);
2700 inc_nlink(new_dir);
2701 }
2702
2703 old_dir->i_size -= BOGO_DIRENT_SIZE;
2704 new_dir->i_size += BOGO_DIRENT_SIZE;
2705 old_dir->i_ctime = old_dir->i_mtime =
2706 new_dir->i_ctime = new_dir->i_mtime =
2707 inode->i_ctime = CURRENT_TIME;
2708 return 0;
2709}
2710
2711static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2712{
2713 return shmem_rename2(old_dir, old_dentry, new_dir, new_dentry, 0);
2714}
2715
2716static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2717{
2718 int error;
2719 int len;
2720 struct inode *inode;
2721 struct page *page;
2722 char *kaddr;
2723 struct shmem_inode_info *info;
2724
2725 len = strlen(symname) + 1;
2726 if (len > PAGE_CACHE_SIZE)
2727 return -ENAMETOOLONG;
2728
2729 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
2730 if (!inode)
2731 return -ENOSPC;
2732
2733 error = security_inode_init_security(inode, dir, &dentry->d_name,
2734 shmem_initxattrs, NULL);
2735 if (error) {
2736 if (error != -EOPNOTSUPP) {
2737 iput(inode);
2738 return error;
2739 }
2740 error = 0;
2741 }
2742
2743 info = SHMEM_I(inode);
2744 inode->i_size = len-1;
2745 if (len <= SHORT_SYMLINK_LEN) {
2746 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2747 if (!info->symlink) {
2748 iput(inode);
2749 return -ENOMEM;
2750 }
2751 inode->i_op = &shmem_short_symlink_operations;
2752 } else {
2753 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2754 if (error) {
2755 iput(inode);
2756 return error;
2757 }
2758 inode->i_mapping->a_ops = &shmem_aops;
2759 inode->i_op = &shmem_symlink_inode_operations;
2760 kaddr = kmap_atomic(page);
2761 memcpy(kaddr, symname, len);
2762 kunmap_atomic(kaddr);
2763 SetPageUptodate(page);
2764 set_page_dirty(page);
2765 unlock_page(page);
2766 page_cache_release(page);
2767 }
2768 dir->i_size += BOGO_DIRENT_SIZE;
2769 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2770 d_instantiate(dentry, inode);
2771 dget(dentry);
2772 return 0;
2773}
2774
2775static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2776{
2777 nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2778 return NULL;
2779}
2780
2781static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2782{
2783 struct page *page = NULL;
2784 int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2785 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2786 if (page)
2787 unlock_page(page);
2788 return page;
2789}
2790
2791static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2792{
2793 if (!IS_ERR(nd_get_link(nd))) {
2794 struct page *page = cookie;
2795 kunmap(page);
2796 mark_page_accessed(page);
2797 page_cache_release(page);
2798 }
2799}
2800
2801#ifdef CONFIG_TMPFS_XATTR
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812static int shmem_initxattrs(struct inode *inode,
2813 const struct xattr *xattr_array,
2814 void *fs_info)
2815{
2816 struct shmem_inode_info *info = SHMEM_I(inode);
2817 const struct xattr *xattr;
2818 struct simple_xattr *new_xattr;
2819 size_t len;
2820
2821 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
2822 new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
2823 if (!new_xattr)
2824 return -ENOMEM;
2825
2826 len = strlen(xattr->name) + 1;
2827 new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
2828 GFP_KERNEL);
2829 if (!new_xattr->name) {
2830 kfree(new_xattr);
2831 return -ENOMEM;
2832 }
2833
2834 memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
2835 XATTR_SECURITY_PREFIX_LEN);
2836 memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
2837 xattr->name, len);
2838
2839 simple_xattr_list_add(&info->xattrs, new_xattr);
2840 }
2841
2842 return 0;
2843}
2844
2845static const struct xattr_handler *shmem_xattr_handlers[] = {
2846#ifdef CONFIG_TMPFS_POSIX_ACL
2847 &generic_acl_access_handler,
2848 &generic_acl_default_handler,
2849#endif
2850 NULL
2851};
2852
2853static int shmem_xattr_validate(const char *name)
2854{
2855 struct { const char *prefix; size_t len; } arr[] = {
2856 { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
2857 { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
2858 };
2859 int i;
2860
2861 for (i = 0; i < ARRAY_SIZE(arr); i++) {
2862 size_t preflen = arr[i].len;
2863 if (strncmp(name, arr[i].prefix, preflen) == 0) {
2864 if (!name[preflen])
2865 return -EINVAL;
2866 return 0;
2867 }
2868 }
2869 return -EOPNOTSUPP;
2870}
2871
2872static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
2873 void *buffer, size_t size)
2874{
2875 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2876 int err;
2877
2878
2879
2880
2881
2882
2883 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2884 return generic_getxattr(dentry, name, buffer, size);
2885
2886 err = shmem_xattr_validate(name);
2887 if (err)
2888 return err;
2889
2890 return simple_xattr_get(&info->xattrs, name, buffer, size);
2891}
2892
2893static int shmem_setxattr(struct dentry *dentry, const char *name,
2894 const void *value, size_t size, int flags)
2895{
2896 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2897 int err;
2898
2899
2900
2901
2902
2903
2904 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2905 return generic_setxattr(dentry, name, value, size, flags);
2906
2907 err = shmem_xattr_validate(name);
2908 if (err)
2909 return err;
2910
2911 return simple_xattr_set(&info->xattrs, name, value, size, flags);
2912}
2913
2914static int shmem_removexattr(struct dentry *dentry, const char *name)
2915{
2916 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2917 int err;
2918
2919
2920
2921
2922
2923
2924 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2925 return generic_removexattr(dentry, name);
2926
2927 err = shmem_xattr_validate(name);
2928 if (err)
2929 return err;
2930
2931 return simple_xattr_remove(&info->xattrs, name);
2932}
2933
2934static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2935{
2936 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2937 return simple_xattr_list(&info->xattrs, buffer, size);
2938}
2939#endif
2940
2941static const struct inode_operations shmem_short_symlink_operations = {
2942 .readlink = generic_readlink,
2943 .follow_link = shmem_follow_short_symlink,
2944#ifdef CONFIG_TMPFS_XATTR
2945 .setxattr = shmem_setxattr,
2946 .getxattr = shmem_getxattr,
2947 .listxattr = shmem_listxattr,
2948 .removexattr = shmem_removexattr,
2949#endif
2950};
2951
2952static const struct inode_operations shmem_symlink_inode_operations = {
2953 .readlink = generic_readlink,
2954 .follow_link = shmem_follow_link,
2955 .put_link = shmem_put_link,
2956#ifdef CONFIG_TMPFS_XATTR
2957 .setxattr = shmem_setxattr,
2958 .getxattr = shmem_getxattr,
2959 .listxattr = shmem_listxattr,
2960 .removexattr = shmem_removexattr,
2961#endif
2962};
2963
2964static struct dentry *shmem_get_parent(struct dentry *child)
2965{
2966 return ERR_PTR(-ESTALE);
2967}
2968
2969static int shmem_match(struct inode *ino, void *vfh)
2970{
2971 __u32 *fh = vfh;
2972 __u64 inum = fh[2];
2973 inum = (inum << 32) | fh[1];
2974 return ino->i_ino == inum && fh[0] == ino->i_generation;
2975}
2976
2977static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2978 struct fid *fid, int fh_len, int fh_type)
2979{
2980 struct inode *inode;
2981 struct dentry *dentry = NULL;
2982 u64 inum;
2983
2984 if (fh_len < 3)
2985 return NULL;
2986
2987 inum = fid->raw[2];
2988 inum = (inum << 32) | fid->raw[1];
2989
2990 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2991 shmem_match, fid->raw);
2992 if (inode) {
2993 dentry = d_find_alias(inode);
2994 iput(inode);
2995 }
2996
2997 return dentry;
2998}
2999
3000static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
3001 struct inode *parent)
3002{
3003 if (*len < 3) {
3004 *len = 3;
3005 return FILEID_INVALID;
3006 }
3007
3008 if (inode_unhashed(inode)) {
3009
3010
3011
3012
3013
3014 static DEFINE_SPINLOCK(lock);
3015 spin_lock(&lock);
3016 if (inode_unhashed(inode))
3017 __insert_inode_hash(inode,
3018 inode->i_ino + inode->i_generation);
3019 spin_unlock(&lock);
3020 }
3021
3022 fh[0] = inode->i_generation;
3023 fh[1] = inode->i_ino;
3024 fh[2] = ((__u64)inode->i_ino) >> 32;
3025
3026 *len = 3;
3027 return 1;
3028}
3029
3030static const struct export_operations shmem_export_ops = {
3031 .get_parent = shmem_get_parent,
3032 .encode_fh = shmem_encode_fh,
3033 .fh_to_dentry = shmem_fh_to_dentry,
3034};
3035
3036static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
3037 bool remount)
3038{
3039 char *this_char, *value, *rest;
3040 struct mempolicy *mpol = NULL;
3041 uid_t uid;
3042 gid_t gid;
3043
3044 while (options != NULL) {
3045 this_char = options;
3046 for (;;) {
3047
3048
3049
3050
3051
3052 options = strchr(options, ',');
3053 if (options == NULL)
3054 break;
3055 options++;
3056 if (!isdigit(*options)) {
3057 options[-1] = '\0';
3058 break;
3059 }
3060 }
3061 if (!*this_char)
3062 continue;
3063 if ((value = strchr(this_char,'=')) != NULL) {
3064 *value++ = 0;
3065 } else {
3066 printk(KERN_ERR
3067 "tmpfs: No value for mount option '%s'\n",
3068 this_char);
3069 goto error;
3070 }
3071
3072 if (!strcmp(this_char,"size")) {
3073 unsigned long long size;
3074 size = memparse(value,&rest);
3075 if (*rest == '%') {
3076 size <<= PAGE_SHIFT;
3077 size *= totalram_pages;
3078 do_div(size, 100);
3079 rest++;
3080 }
3081 if (*rest)
3082 goto bad_val;
3083 sbinfo->max_blocks =
3084 DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
3085 } else if (!strcmp(this_char,"nr_blocks")) {
3086 sbinfo->max_blocks = memparse(value, &rest);
3087 if (*rest)
3088 goto bad_val;
3089 } else if (!strcmp(this_char,"nr_inodes")) {
3090 sbinfo->max_inodes = memparse(value, &rest);
3091 if (*rest)
3092 goto bad_val;
3093 } else if (!strcmp(this_char,"mode")) {
3094 if (remount)
3095 continue;
3096 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
3097 if (*rest)
3098 goto bad_val;
3099 } else if (!strcmp(this_char,"uid")) {
3100 if (remount)
3101 continue;
3102 uid = simple_strtoul(value, &rest, 0);
3103 if (*rest)
3104 goto bad_val;
3105 sbinfo->uid = make_kuid(current_user_ns(), uid);
3106 if (!uid_valid(sbinfo->uid))
3107 goto bad_val;
3108 } else if (!strcmp(this_char,"gid")) {
3109 if (remount)
3110 continue;
3111 gid = simple_strtoul(value, &rest, 0);
3112 if (*rest)
3113 goto bad_val;
3114 sbinfo->gid = make_kgid(current_user_ns(), gid);
3115 if (!gid_valid(sbinfo->gid))
3116 goto bad_val;
3117 } else if (!strcmp(this_char,"mpol")) {
3118 mpol_put(mpol);
3119 mpol = NULL;
3120 if (mpol_parse_str(value, &mpol))
3121 goto bad_val;
3122 } else {
3123 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
3124 this_char);
3125 goto error;
3126 }
3127 }
3128 sbinfo->mpol = mpol;
3129 return 0;
3130
3131bad_val:
3132 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
3133 value, this_char);
3134error:
3135 mpol_put(mpol);
3136 return 1;
3137
3138}
3139
3140static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
3141{
3142 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3143 struct shmem_sb_info config = *sbinfo;
3144 unsigned long inodes;
3145 int error = -EINVAL;
3146
3147 config.mpol = NULL;
3148 if (shmem_parse_options(data, &config, true))
3149 return error;
3150
3151 spin_lock(&sbinfo->stat_lock);
3152 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
3153 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
3154 goto out;
3155 if (config.max_inodes < inodes)
3156 goto out;
3157
3158
3159
3160
3161
3162 if (config.max_blocks && !sbinfo->max_blocks)
3163 goto out;
3164 if (config.max_inodes && !sbinfo->max_inodes)
3165 goto out;
3166
3167 error = 0;
3168 sbinfo->max_blocks = config.max_blocks;
3169 sbinfo->max_inodes = config.max_inodes;
3170 sbinfo->free_inodes = config.max_inodes - inodes;
3171
3172
3173
3174
3175 if (config.mpol) {
3176 mpol_put(sbinfo->mpol);
3177 sbinfo->mpol = config.mpol;
3178 }
3179out:
3180 spin_unlock(&sbinfo->stat_lock);
3181 return error;
3182}
3183
3184static int shmem_show_options(struct seq_file *seq, struct dentry *root)
3185{
3186 struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
3187
3188 if (sbinfo->max_blocks != shmem_default_max_blocks())
3189 seq_printf(seq, ",size=%luk",
3190 sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
3191 if (sbinfo->max_inodes != shmem_default_max_inodes())
3192 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
3193 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
3194 seq_printf(seq, ",mode=%03ho", sbinfo->mode);
3195 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
3196 seq_printf(seq, ",uid=%u",
3197 from_kuid_munged(&init_user_ns, sbinfo->uid));
3198 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
3199 seq_printf(seq, ",gid=%u",
3200 from_kgid_munged(&init_user_ns, sbinfo->gid));
3201 shmem_show_mpol(seq, sbinfo->mpol);
3202 return 0;
3203}
3204
3205#define MFD_NAME_PREFIX "memfd:"
3206#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
3207#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
3208
3209#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
3210
3211SYSCALL_DEFINE2(memfd_create,
3212 const char __user *, uname,
3213 unsigned int, flags)
3214{
3215 struct shmem_inode_info *info;
3216 struct file *file;
3217 int fd, error;
3218 char *name;
3219 long len;
3220
3221 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3222 return -EINVAL;
3223
3224
3225 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
3226 if (len <= 0)
3227 return -EFAULT;
3228 if (len > MFD_NAME_MAX_LEN + 1)
3229 return -EINVAL;
3230
3231 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
3232 if (!name)
3233 return -ENOMEM;
3234
3235 strcpy(name, MFD_NAME_PREFIX);
3236 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
3237 error = -EFAULT;
3238 goto err_name;
3239 }
3240
3241
3242 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
3243 error = -EFAULT;
3244 goto err_name;
3245 }
3246
3247 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
3248 if (fd < 0) {
3249 error = fd;
3250 goto err_name;
3251 }
3252
3253 file = shmem_file_setup(name, 0, VM_NORESERVE);
3254 if (IS_ERR(file)) {
3255 error = PTR_ERR(file);
3256 goto err_fd;
3257 }
3258 info = SHMEM_I(file_inode(file));
3259 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
3260 file->f_flags |= O_RDWR | O_LARGEFILE;
3261 if (flags & MFD_ALLOW_SEALING)
3262 info->seals &= ~F_SEAL_SEAL;
3263
3264 fd_install(fd, file);
3265 kfree(name);
3266 return fd;
3267
3268err_fd:
3269 put_unused_fd(fd);
3270err_name:
3271 kfree(name);
3272 return error;
3273}
3274
3275#endif
3276
3277static void shmem_put_super(struct super_block *sb)
3278{
3279 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3280
3281 percpu_counter_destroy(&sbinfo->used_blocks);
3282 mpol_put(sbinfo->mpol);
3283 kfree(sbinfo);
3284 sb->s_fs_info = NULL;
3285}
3286
3287int shmem_fill_super(struct super_block *sb, void *data, int silent)
3288{
3289 struct inode *inode;
3290 struct shmem_sb_info *sbinfo;
3291 int err = -ENOMEM;
3292
3293
3294 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
3295 L1_CACHE_BYTES), GFP_KERNEL);
3296 if (!sbinfo)
3297 return -ENOMEM;
3298
3299 sbinfo->mode = S_IRWXUGO | S_ISVTX;
3300 sbinfo->uid = current_fsuid();
3301 sbinfo->gid = current_fsgid();
3302 sb->s_fs_info = sbinfo;
3303
3304#ifdef CONFIG_TMPFS
3305
3306
3307
3308
3309
3310 if (!(sb->s_flags & MS_KERNMOUNT)) {
3311 sbinfo->max_blocks = shmem_default_max_blocks();
3312 sbinfo->max_inodes = shmem_default_max_inodes();
3313 if (shmem_parse_options(data, sbinfo, false)) {
3314 err = -EINVAL;
3315 goto failed;
3316 }
3317 } else {
3318 sb->s_flags |= MS_NOUSER;
3319 }
3320 sb->s_export_op = &shmem_export_ops;
3321 sb->s_flags |= MS_NOSEC;
3322#else
3323 sb->s_flags |= MS_NOUSER;
3324#endif
3325
3326 spin_lock_init(&sbinfo->stat_lock);
3327 if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
3328 goto failed;
3329 sbinfo->free_inodes = sbinfo->max_inodes;
3330
3331 sb->s_maxbytes = MAX_LFS_FILESIZE;
3332 sb->s_blocksize = PAGE_CACHE_SIZE;
3333 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
3334 sb->s_magic = TMPFS_MAGIC;
3335 sb->s_op = &shmem_ops;
3336 sb->s_time_gran = 1;
3337#ifdef CONFIG_TMPFS_XATTR
3338 sb->s_xattr = shmem_xattr_handlers;
3339#endif
3340#ifdef CONFIG_TMPFS_POSIX_ACL
3341 sb->s_flags |= MS_POSIXACL;
3342#endif
3343 uuid_be_gen((uuid_be *) &sb->s_uuid);
3344
3345 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
3346 if (!inode)
3347 goto failed;
3348 inode->i_uid = sbinfo->uid;
3349 inode->i_gid = sbinfo->gid;
3350 sb->s_root = d_make_root(inode);
3351 if (!sb->s_root)
3352 goto failed;
3353 return 0;
3354
3355failed:
3356 shmem_put_super(sb);
3357 return err;
3358}
3359
3360static struct kmem_cache *shmem_inode_cachep;
3361
3362static struct inode *shmem_alloc_inode(struct super_block *sb)
3363{
3364 struct shmem_inode_info *info;
3365 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
3366 if (!info)
3367 return NULL;
3368 return &info->vfs_inode;
3369}
3370
3371static void shmem_destroy_callback(struct rcu_head *head)
3372{
3373 struct inode *inode = container_of(head, struct inode, i_rcu);
3374 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3375}
3376
3377static void shmem_destroy_inode(struct inode *inode)
3378{
3379 if (S_ISREG(inode->i_mode))
3380 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
3381 call_rcu(&inode->i_rcu, shmem_destroy_callback);
3382}
3383
3384static void shmem_init_inode(void *foo)
3385{
3386 struct shmem_inode_info *info = foo;
3387 inode_init_once(&info->vfs_inode);
3388}
3389
3390static int shmem_init_inodecache(void)
3391{
3392 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
3393 sizeof(struct shmem_inode_info),
3394 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
3395 return 0;
3396}
3397
3398static void shmem_destroy_inodecache(void)
3399{
3400 kmem_cache_destroy(shmem_inode_cachep);
3401}
3402
3403static const struct address_space_operations shmem_aops = {
3404 .writepage = shmem_writepage,
3405 .set_page_dirty = __set_page_dirty_no_writeback,
3406#ifdef CONFIG_TMPFS
3407 .write_begin = shmem_write_begin,
3408 .write_end = shmem_write_end,
3409#endif
3410 .migratepage = migrate_page,
3411 .error_remove_page = generic_error_remove_page,
3412};
3413
3414static const struct file_operations shmem_file_operations = {
3415 .mmap = shmem_mmap,
3416#ifdef CONFIG_TMPFS
3417 .llseek = shmem_file_llseek,
3418 .read = do_sync_read,
3419 .write = do_sync_write,
3420 .aio_read = shmem_file_aio_read,
3421 .aio_write = generic_file_aio_write,
3422 .fsync = noop_fsync,
3423 .splice_read = shmem_file_splice_read,
3424 .splice_write = generic_file_splice_write,
3425 .fallocate = shmem_fallocate,
3426#endif
3427};
3428
3429static const struct inode_operations shmem_inode_operations = {
3430 .setattr = shmem_setattr,
3431#ifdef CONFIG_TMPFS_XATTR
3432 .setxattr = shmem_setxattr,
3433 .getxattr = shmem_getxattr,
3434 .listxattr = shmem_listxattr,
3435 .removexattr = shmem_removexattr,
3436#endif
3437};
3438
3439static const struct inode_operations_wrapper shmem_dir_inode_operations = {
3440 .ops = {
3441#ifdef CONFIG_TMPFS
3442 .create = shmem_create,
3443 .lookup = simple_lookup,
3444 .link = shmem_link,
3445 .unlink = shmem_unlink,
3446 .symlink = shmem_symlink,
3447 .mkdir = shmem_mkdir,
3448 .rmdir = shmem_rmdir,
3449 .mknod = shmem_mknod,
3450 .rename = shmem_rename,
3451#endif
3452#ifdef CONFIG_TMPFS_XATTR
3453 .setxattr = shmem_setxattr,
3454 .getxattr = shmem_getxattr,
3455 .listxattr = shmem_listxattr,
3456 .removexattr = shmem_removexattr,
3457#endif
3458#ifdef CONFIG_TMPFS_POSIX_ACL
3459 .setattr = shmem_setattr,
3460#endif
3461 },
3462#ifdef CONFIG_TMPFS
3463 .rename2 = shmem_rename2,
3464 .tmpfile = shmem_tmpfile,
3465#endif
3466};
3467
3468static const struct inode_operations shmem_special_inode_operations = {
3469#ifdef CONFIG_TMPFS_XATTR
3470 .setxattr = shmem_setxattr,
3471 .getxattr = shmem_getxattr,
3472 .listxattr = shmem_listxattr,
3473 .removexattr = shmem_removexattr,
3474#endif
3475#ifdef CONFIG_TMPFS_POSIX_ACL
3476 .setattr = shmem_setattr,
3477#endif
3478};
3479
3480static const struct super_operations shmem_ops = {
3481 .alloc_inode = shmem_alloc_inode,
3482 .destroy_inode = shmem_destroy_inode,
3483#ifdef CONFIG_TMPFS
3484 .statfs = shmem_statfs,
3485 .remount_fs = shmem_remount_fs,
3486 .show_options = shmem_show_options,
3487#endif
3488 .evict_inode = shmem_evict_inode,
3489 .drop_inode = generic_delete_inode,
3490 .put_super = shmem_put_super,
3491};
3492
3493static const struct vm_operations_struct shmem_vm_ops = {
3494 .fault = shmem_fault,
3495#ifdef CONFIG_NUMA
3496 .set_policy = shmem_set_policy,
3497 .get_policy = shmem_get_policy,
3498#endif
3499 .remap_pages = generic_file_remap_pages,
3500};
3501
3502static struct dentry *shmem_mount(struct file_system_type *fs_type,
3503 int flags, const char *dev_name, void *data)
3504{
3505 return mount_nodev(fs_type, flags, data, shmem_fill_super);
3506}
3507
3508static struct file_system_type shmem_fs_type = {
3509 .owner = THIS_MODULE,
3510 .name = "tmpfs",
3511 .mount = shmem_mount,
3512 .kill_sb = kill_litter_super,
3513 .fs_flags = FS_USERNS_MOUNT,
3514};
3515
3516int __init shmem_init(void)
3517{
3518 int error;
3519
3520
3521 if (shmem_inode_cachep)
3522 return 0;
3523
3524 error = bdi_init(&shmem_backing_dev_info);
3525 if (error)
3526 goto out4;
3527
3528 error = shmem_init_inodecache();
3529 if (error)
3530 goto out3;
3531
3532 error = register_filesystem(&shmem_fs_type);
3533 if (error) {
3534 printk(KERN_ERR "Could not register tmpfs\n");
3535 goto out2;
3536 }
3537
3538 shm_mnt = kern_mount(&shmem_fs_type);
3539 if (IS_ERR(shm_mnt)) {
3540 error = PTR_ERR(shm_mnt);
3541 printk(KERN_ERR "Could not kern_mount tmpfs\n");
3542 goto out1;
3543 }
3544 return 0;
3545
3546out1:
3547 unregister_filesystem(&shmem_fs_type);
3548out2:
3549 shmem_destroy_inodecache();
3550out3:
3551 bdi_destroy(&shmem_backing_dev_info);
3552out4:
3553 shm_mnt = ERR_PTR(error);
3554 return error;
3555}
3556
3557#else
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568static struct file_system_type shmem_fs_type = {
3569 .name = "tmpfs",
3570 .mount = ramfs_mount,
3571 .kill_sb = kill_litter_super,
3572 .fs_flags = FS_USERNS_MOUNT,
3573};
3574
3575int __init shmem_init(void)
3576{
3577 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
3578
3579 shm_mnt = kern_mount(&shmem_fs_type);
3580 BUG_ON(IS_ERR(shm_mnt));
3581
3582 return 0;
3583}
3584
3585int shmem_unuse(swp_entry_t swap, struct page *page)
3586{
3587 return 0;
3588}
3589
3590int shmem_lock(struct file *file, int lock, struct user_struct *user)
3591{
3592 return 0;
3593}
3594
3595void shmem_unlock_mapping(struct address_space *mapping)
3596{
3597}
3598
3599void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
3600{
3601 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
3602}
3603EXPORT_SYMBOL_GPL(shmem_truncate_range);
3604
3605#define shmem_vm_ops generic_file_vm_ops
3606#define shmem_file_operations ramfs_file_operations
3607#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
3608#define shmem_acct_size(flags, size) 0
3609#define shmem_unacct_size(flags, size) do {} while (0)
3610
3611#endif
3612
3613
3614
3615static struct dentry_operations anon_ops = {
3616 .d_dname = simple_dname
3617};
3618
3619static struct file *__shmem_file_setup(const char *name, loff_t size,
3620 unsigned long flags, unsigned int i_flags)
3621{
3622 struct file *res;
3623 struct inode *inode;
3624 struct path path;
3625 struct super_block *sb;
3626 struct qstr this;
3627
3628 if (IS_ERR(shm_mnt))
3629 return ERR_CAST(shm_mnt);
3630
3631 if (size < 0 || size > MAX_LFS_FILESIZE)
3632 return ERR_PTR(-EINVAL);
3633
3634 if (shmem_acct_size(flags, size))
3635 return ERR_PTR(-ENOMEM);
3636
3637 res = ERR_PTR(-ENOMEM);
3638 this.name = name;
3639 this.len = strlen(name);
3640 this.hash = 0;
3641 sb = shm_mnt->mnt_sb;
3642 path.dentry = d_alloc_pseudo(sb, &this);
3643 if (!path.dentry)
3644 goto put_memory;
3645 d_set_d_op(path.dentry, &anon_ops);
3646 path.mnt = mntget(shm_mnt);
3647
3648 res = ERR_PTR(-ENOSPC);
3649 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
3650 if (!inode)
3651 goto put_dentry;
3652
3653 inode->i_flags |= i_flags;
3654 d_instantiate(path.dentry, inode);
3655 inode->i_size = size;
3656 clear_nlink(inode);
3657 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
3658 if (IS_ERR(res))
3659 goto put_dentry;
3660
3661 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
3662 &shmem_file_operations);
3663 if (IS_ERR(res))
3664 goto put_dentry;
3665
3666 return res;
3667
3668put_dentry:
3669 path_put(&path);
3670put_memory:
3671 shmem_unacct_size(flags, size);
3672 return res;
3673}
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
3686{
3687 return __shmem_file_setup(name, size, flags, S_PRIVATE);
3688}
3689
3690
3691
3692
3693
3694
3695
3696struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
3697{
3698 return __shmem_file_setup(name, size, flags, 0);
3699}
3700EXPORT_SYMBOL_GPL(shmem_file_setup);
3701
3702
3703
3704
3705
3706int shmem_zero_setup(struct vm_area_struct *vma)
3707{
3708 struct file *file;
3709 loff_t size = vma->vm_end - vma->vm_start;
3710
3711 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
3712 if (IS_ERR(file))
3713 return PTR_ERR(file);
3714
3715 if (vma->vm_file)
3716 fput(vma->vm_file);
3717 vma->vm_file = file;
3718 vma->vm_ops = &shmem_vm_ops;
3719 return 0;
3720}
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
3738 pgoff_t index, gfp_t gfp)
3739{
3740#ifdef CONFIG_SHMEM
3741 struct inode *inode = mapping->host;
3742 struct page *page;
3743 int error;
3744
3745 BUG_ON(mapping->a_ops != &shmem_aops);
3746 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
3747 gfp, NULL, NULL, NULL);
3748 if (error)
3749 page = ERR_PTR(error);
3750 else
3751 unlock_page(page);
3752 return page;
3753#else
3754
3755
3756
3757 return read_cache_page_gfp(mapping, index, gfp);
3758#endif
3759}
3760EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
3761