1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/pagemap.h>
30#include <linux/file.h>
31#include <linux/mm.h>
32#include <linux/export.h>
33#include <linux/swap.h>
34#include <linux/aio.h>
35#include <linux/syscalls.h>
36#include <uapi/linux/memfd.h>
37
38static struct vfsmount *shm_mnt;
39
40#ifdef CONFIG_SHMEM
41
42
43
44
45
46
47#include <linux/xattr.h>
48#include <linux/exportfs.h>
49#include <linux/posix_acl.h>
50#include <linux/generic_acl.h>
51#include <linux/mman.h>
52#include <linux/string.h>
53#include <linux/slab.h>
54#include <linux/backing-dev.h>
55#include <linux/shmem_fs.h>
56#include <linux/writeback.h>
57#include <linux/blkdev.h>
58#include <linux/pagevec.h>
59#include <linux/percpu_counter.h>
60#include <linux/falloc.h>
61#include <linux/splice.h>
62#include <linux/security.h>
63#include <linux/swapops.h>
64#include <linux/mempolicy.h>
65#include <linux/namei.h>
66#include <linux/ctype.h>
67#include <linux/migrate.h>
68#include <linux/highmem.h>
69#include <linux/seq_file.h>
70#include <linux/magic.h>
71#include <linux/fcntl.h>
72#include <linux/userfaultfd_k.h>
73#include <linux/rmap.h>
74
75#include <asm/uaccess.h>
76#include <asm/pgtable.h>
77
78#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
79#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
80
81
82#define BOGO_DIRENT_SIZE 20
83
84
85#define SHORT_SYMLINK_LEN 128
86
87
88
89
90
91
92struct shmem_falloc {
93 wait_queue_head_t *waitq;
94 pgoff_t start;
95 pgoff_t next;
96 pgoff_t nr_falloced;
97 pgoff_t nr_unswapped;
98};
99
100
101enum sgp_type {
102 SGP_READ,
103 SGP_CACHE,
104 SGP_DIRTY,
105 SGP_WRITE,
106 SGP_FALLOC,
107};
108
109#ifdef CONFIG_TMPFS
110static unsigned long shmem_default_max_blocks(void)
111{
112 return totalram_pages / 2;
113}
114
115static unsigned long shmem_default_max_inodes(void)
116{
117 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
118}
119#endif
120
121static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
122static int shmem_replace_page(struct page **pagep, gfp_t gfp,
123 struct shmem_inode_info *info, pgoff_t index);
124static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
125 struct page **pagep, enum sgp_type sgp,
126 gfp_t gfp, struct vm_area_struct *vma,
127 struct vm_fault *vmf, int *fault_type);
128
129static inline int shmem_getpage(struct inode *inode, pgoff_t index,
130 struct page **pagep, enum sgp_type sgp,
131 int *fault_type)
132{
133 return shmem_getpage_gfp(inode, index, pagep, sgp,
134 mapping_gfp_mask(inode->i_mapping),
135 NULL, NULL, fault_type);
136}
137
138static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
139{
140 return sb->s_fs_info;
141}
142
143
144
145
146
147
148
149static inline int shmem_acct_size(unsigned long flags, loff_t size)
150{
151 return (flags & VM_NORESERVE) ?
152 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
153}
154
155static inline void shmem_unacct_size(unsigned long flags, loff_t size)
156{
157 if (!(flags & VM_NORESERVE))
158 vm_unacct_memory(VM_ACCT(size));
159}
160
161
162
163
164
165
166
167static inline int shmem_acct_block(unsigned long flags)
168{
169 return (flags & VM_NORESERVE) ?
170 security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
171}
172
173static inline void shmem_unacct_blocks(unsigned long flags, long pages)
174{
175 if (flags & VM_NORESERVE)
176 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
177}
178
179static const struct super_operations shmem_ops;
180static const struct address_space_operations shmem_aops;
181static const struct file_operations shmem_file_operations;
182static const struct inode_operations shmem_inode_operations;
183static const struct inode_operations_wrapper shmem_dir_inode_operations;
184static const struct inode_operations shmem_special_inode_operations;
185static const struct vm_operations_struct shmem_vm_ops;
186
187static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
188 .ra_pages = 0,
189 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
190};
191
192bool vma_is_shmem(struct vm_area_struct *vma)
193{
194 return vma->vm_ops == &shmem_vm_ops;
195}
196
197static LIST_HEAD(shmem_swaplist);
198static DEFINE_MUTEX(shmem_swaplist_mutex);
199
200static int shmem_reserve_inode(struct super_block *sb)
201{
202 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
203 if (sbinfo->max_inodes) {
204 spin_lock(&sbinfo->stat_lock);
205 if (!sbinfo->free_inodes) {
206 spin_unlock(&sbinfo->stat_lock);
207 return -ENOSPC;
208 }
209 sbinfo->free_inodes--;
210 spin_unlock(&sbinfo->stat_lock);
211 }
212 return 0;
213}
214
215static void shmem_free_inode(struct super_block *sb)
216{
217 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
218 if (sbinfo->max_inodes) {
219 spin_lock(&sbinfo->stat_lock);
220 sbinfo->free_inodes++;
221 spin_unlock(&sbinfo->stat_lock);
222 }
223}
224
225
226
227
228
229
230
231
232
233
234
235
236
237static void shmem_recalc_inode(struct inode *inode)
238{
239 struct shmem_inode_info *info = SHMEM_I(inode);
240 long freed;
241
242 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
243 if (freed > 0) {
244 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
245 if (sbinfo->max_blocks)
246 percpu_counter_add(&sbinfo->used_blocks, -freed);
247 info->alloced -= freed;
248 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
249 shmem_unacct_blocks(info->flags, freed);
250 }
251}
252
253
254
255
256static int shmem_radix_tree_replace(struct address_space *mapping,
257 pgoff_t index, void *expected, void *replacement)
258{
259 void **pslot;
260 void *item;
261
262 VM_BUG_ON(!expected);
263 VM_BUG_ON(!replacement);
264 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
265 if (!pslot)
266 return -ENOENT;
267 item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
268 if (item != expected)
269 return -ENOENT;
270 radix_tree_replace_slot(pslot, replacement);
271 return 0;
272}
273
274
275
276
277
278
279
280
281static bool shmem_confirm_swap(struct address_space *mapping,
282 pgoff_t index, swp_entry_t swap)
283{
284 void *item;
285
286 rcu_read_lock();
287 item = radix_tree_lookup(&mapping->page_tree, index);
288 rcu_read_unlock();
289 return item == swp_to_radix_entry(swap);
290}
291
292
293
294
295static int shmem_add_to_page_cache(struct page *page,
296 struct address_space *mapping,
297 pgoff_t index, gfp_t gfp, void *expected)
298{
299 int error;
300
301 VM_BUG_ON_PAGE(!PageLocked(page), page);
302 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
303
304 page_cache_get(page);
305 page->mapping = mapping;
306 page->index = index;
307
308 spin_lock_irq(&mapping->tree_lock);
309 if (!expected)
310 error = radix_tree_insert(&mapping->page_tree, index, page);
311 else
312 error = shmem_radix_tree_replace(mapping, index, expected,
313 page);
314 if (!error) {
315 mapping->nrpages++;
316 __inc_zone_page_state(page, NR_FILE_PAGES);
317 __inc_zone_page_state(page, NR_SHMEM);
318 spin_unlock_irq(&mapping->tree_lock);
319 } else {
320 page->mapping = NULL;
321 spin_unlock_irq(&mapping->tree_lock);
322 page_cache_release(page);
323 }
324 return error;
325}
326
327
328
329
330static void shmem_delete_from_page_cache(struct page *page, void *radswap)
331{
332 struct address_space *mapping = page->mapping;
333 int error;
334
335 spin_lock_irq(&mapping->tree_lock);
336 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
337 page->mapping = NULL;
338 mapping->nrpages--;
339 __dec_zone_page_state(page, NR_FILE_PAGES);
340 __dec_zone_page_state(page, NR_SHMEM);
341 spin_unlock_irq(&mapping->tree_lock);
342 page_cache_release(page);
343 BUG_ON(error);
344}
345
346
347
348
349static int shmem_free_swap(struct address_space *mapping,
350 pgoff_t index, void *radswap)
351{
352 void *old;
353
354 spin_lock_irq(&mapping->tree_lock);
355 old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
356 spin_unlock_irq(&mapping->tree_lock);
357 if (old != radswap)
358 return -ENOENT;
359 free_swap_and_cache(radix_to_swp_entry(radswap));
360 return 0;
361}
362
363
364
365
366
367
368
369
370unsigned long shmem_partial_swap_usage(struct address_space *mapping,
371 pgoff_t start, pgoff_t end)
372{
373 struct radix_tree_iter iter;
374 void **slot;
375 struct page *page;
376 unsigned long swapped = 0;
377
378 rcu_read_lock();
379
380restart:
381 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
382 if (iter.index >= end)
383 break;
384
385 page = radix_tree_deref_slot(slot);
386
387
388
389
390
391
392 if (radix_tree_deref_retry(page))
393 goto restart;
394
395 if (radix_tree_exceptional_entry(page))
396 swapped++;
397
398 if (need_resched()) {
399 cond_resched_rcu();
400 start = iter.index + 1;
401 goto restart;
402 }
403 }
404
405 rcu_read_unlock();
406
407 return swapped << PAGE_SHIFT;
408}
409
410
411
412
413
414
415
416
417unsigned long shmem_swap_usage(struct vm_area_struct *vma)
418{
419 struct inode *inode = file_inode(vma->vm_file);
420 struct shmem_inode_info *info = SHMEM_I(inode);
421 struct address_space *mapping = inode->i_mapping;
422 unsigned long swapped;
423
424
425 swapped = READ_ONCE(info->swapped);
426
427
428
429
430
431
432 if (!swapped)
433 return 0;
434
435 if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
436 return swapped << PAGE_SHIFT;
437
438
439 return shmem_partial_swap_usage(mapping,
440 linear_page_index(vma, vma->vm_start),
441 linear_page_index(vma, vma->vm_end));
442}
443
444
445
446
447void shmem_unlock_mapping(struct address_space *mapping)
448{
449 struct pagevec pvec;
450 pgoff_t indices[PAGEVEC_SIZE];
451 pgoff_t index = 0;
452
453 pagevec_init(&pvec, 0);
454
455
456
457 while (!mapping_unevictable(mapping)) {
458
459
460
461
462 pvec.nr = __find_get_pages(mapping, index,
463 PAGEVEC_SIZE, pvec.pages, indices);
464 if (!pvec.nr)
465 break;
466 index = indices[pvec.nr - 1] + 1;
467 pagevec_remove_exceptionals(&pvec);
468 check_move_unevictable_pages(pvec.pages, pvec.nr);
469 pagevec_release(&pvec);
470 cond_resched();
471 }
472}
473
474
475
476
477
478static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
479 bool unfalloc)
480{
481 struct address_space *mapping = inode->i_mapping;
482 struct shmem_inode_info *info = SHMEM_I(inode);
483 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
484 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
485 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
486 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
487 struct pagevec pvec;
488 pgoff_t indices[PAGEVEC_SIZE];
489 long nr_swaps_freed = 0;
490 pgoff_t index;
491 int i;
492
493 if (lend == -1)
494 end = -1;
495
496 pagevec_init(&pvec, 0);
497 index = start;
498 while (index < end) {
499 pvec.nr = __find_get_pages(mapping, index,
500 min(end - index, (pgoff_t)PAGEVEC_SIZE),
501 pvec.pages, indices);
502 if (!pvec.nr)
503 break;
504 mem_cgroup_uncharge_start();
505 for (i = 0; i < pagevec_count(&pvec); i++) {
506 struct page *page = pvec.pages[i];
507
508 index = indices[i];
509 if (index >= end)
510 break;
511
512 if (radix_tree_exceptional_entry(page)) {
513 if (unfalloc)
514 continue;
515 nr_swaps_freed += !shmem_free_swap(mapping,
516 index, page);
517 continue;
518 }
519
520 if (!trylock_page(page))
521 continue;
522 if (!unfalloc || !PageUptodate(page)) {
523 if (page->mapping == mapping) {
524 VM_BUG_ON_PAGE(PageWriteback(page), page);
525 truncate_inode_page(mapping, page);
526 }
527 }
528 unlock_page(page);
529 }
530 pagevec_remove_exceptionals(&pvec);
531 pagevec_release(&pvec);
532 mem_cgroup_uncharge_end();
533 cond_resched();
534 index++;
535 }
536
537 if (partial_start) {
538 struct page *page = NULL;
539 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
540 if (page) {
541 unsigned int top = PAGE_CACHE_SIZE;
542 if (start > end) {
543 top = partial_end;
544 partial_end = 0;
545 }
546 zero_user_segment(page, partial_start, top);
547 set_page_dirty(page);
548 unlock_page(page);
549 page_cache_release(page);
550 }
551 }
552 if (partial_end) {
553 struct page *page = NULL;
554 shmem_getpage(inode, end, &page, SGP_READ, NULL);
555 if (page) {
556 zero_user_segment(page, 0, partial_end);
557 set_page_dirty(page);
558 unlock_page(page);
559 page_cache_release(page);
560 }
561 }
562 if (start >= end)
563 return;
564
565 index = start;
566 while (index < end) {
567 cond_resched();
568
569 pvec.nr = __find_get_pages(mapping, index,
570 min(end - index, (pgoff_t)PAGEVEC_SIZE),
571 pvec.pages, indices);
572 if (!pvec.nr) {
573
574 if (index == start || end != -1)
575 break;
576
577 index = start;
578 continue;
579 }
580 mem_cgroup_uncharge_start();
581 for (i = 0; i < pagevec_count(&pvec); i++) {
582 struct page *page = pvec.pages[i];
583
584 index = indices[i];
585 if (index >= end)
586 break;
587
588 if (radix_tree_exceptional_entry(page)) {
589 if (unfalloc)
590 continue;
591 if (shmem_free_swap(mapping, index, page)) {
592
593 index--;
594 break;
595 }
596 nr_swaps_freed++;
597 continue;
598 }
599
600 lock_page(page);
601 if (!unfalloc || !PageUptodate(page)) {
602 if (page->mapping == mapping) {
603 VM_BUG_ON_PAGE(PageWriteback(page), page);
604 truncate_inode_page(mapping, page);
605 } else {
606
607 unlock_page(page);
608 index--;
609 break;
610 }
611 }
612 unlock_page(page);
613 }
614 pagevec_remove_exceptionals(&pvec);
615 pagevec_release(&pvec);
616 mem_cgroup_uncharge_end();
617 index++;
618 }
619
620 spin_lock(&info->lock);
621 info->swapped -= nr_swaps_freed;
622 shmem_recalc_inode(inode);
623 spin_unlock(&info->lock);
624}
625
626void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
627{
628 shmem_undo_range(inode, lstart, lend, false);
629 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
630}
631EXPORT_SYMBOL_GPL(shmem_truncate_range);
632
633static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
634{
635 struct inode *inode = dentry->d_inode;
636 struct shmem_inode_info *info = SHMEM_I(inode);
637 int error;
638
639 error = inode_change_ok(inode, attr);
640 if (error)
641 return error;
642
643 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
644 loff_t oldsize = inode->i_size;
645 loff_t newsize = attr->ia_size;
646
647
648 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
649 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
650 return -EPERM;
651
652 if (newsize != oldsize) {
653 i_size_write(inode, newsize);
654 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
655 }
656 if (newsize < oldsize) {
657 loff_t holebegin = round_up(newsize, PAGE_SIZE);
658 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
659 shmem_truncate_range(inode, newsize, (loff_t)-1);
660
661 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
662 }
663 }
664
665 setattr_copy(inode, attr);
666#ifdef CONFIG_TMPFS_POSIX_ACL
667 if (attr->ia_valid & ATTR_MODE)
668 error = generic_acl_chmod(inode);
669#endif
670 return error;
671}
672
673static void shmem_evict_inode(struct inode *inode)
674{
675 struct shmem_inode_info *info = SHMEM_I(inode);
676
677 if (inode->i_mapping->a_ops == &shmem_aops) {
678 shmem_unacct_size(info->flags, inode->i_size);
679 inode->i_size = 0;
680 shmem_truncate_range(inode, 0, (loff_t)-1);
681 if (!list_empty(&info->swaplist)) {
682 mutex_lock(&shmem_swaplist_mutex);
683 list_del_init(&info->swaplist);
684 mutex_unlock(&shmem_swaplist_mutex);
685 }
686 } else
687 kfree(info->symlink);
688
689 simple_xattrs_free(&info->xattrs);
690 WARN_ON(inode->i_blocks);
691 shmem_free_inode(inode->i_sb);
692 clear_inode(inode);
693}
694
695
696
697
698static int shmem_unuse_inode(struct shmem_inode_info *info,
699 swp_entry_t swap, struct page **pagep)
700{
701 struct address_space *mapping = info->vfs_inode.i_mapping;
702 void *radswap;
703 pgoff_t index;
704 gfp_t gfp;
705 int error = 0;
706
707 radswap = swp_to_radix_entry(swap);
708 index = radix_tree_locate_item(&mapping->page_tree, radswap);
709 if (index == -1)
710 return 0;
711
712
713
714
715
716
717
718 if (shmem_swaplist.next != &info->swaplist)
719 list_move_tail(&shmem_swaplist, &info->swaplist);
720
721 gfp = mapping_gfp_mask(mapping);
722 if (shmem_should_replace_page(*pagep, gfp)) {
723 mutex_unlock(&shmem_swaplist_mutex);
724 error = shmem_replace_page(pagep, gfp, info, index);
725 mutex_lock(&shmem_swaplist_mutex);
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744 if (!page_swapcount(*pagep))
745 error = -ENOENT;
746 }
747
748
749
750
751
752
753 if (!error)
754 error = shmem_add_to_page_cache(*pagep, mapping, index,
755 GFP_NOWAIT, radswap);
756 if (error != -ENOMEM) {
757
758
759
760
761 delete_from_swap_cache(*pagep);
762 set_page_dirty(*pagep);
763 if (!error) {
764 spin_lock(&info->lock);
765 info->swapped--;
766 spin_unlock(&info->lock);
767 swap_free(swap);
768 }
769 error = 1;
770 }
771 return error;
772}
773
774
775
776
777int shmem_unuse(swp_entry_t swap, struct page *page)
778{
779 struct list_head *this, *next;
780 struct shmem_inode_info *info;
781 int found = 0;
782 int error = 0;
783
784
785
786
787
788 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
789 goto out;
790
791
792
793
794
795
796 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
797 if (error)
798 goto out;
799
800
801 mutex_lock(&shmem_swaplist_mutex);
802 list_for_each_safe(this, next, &shmem_swaplist) {
803 info = list_entry(this, struct shmem_inode_info, swaplist);
804 if (info->swapped)
805 found = shmem_unuse_inode(info, swap, &page);
806 else
807 list_del_init(&info->swaplist);
808 cond_resched();
809 if (found)
810 break;
811 }
812 mutex_unlock(&shmem_swaplist_mutex);
813
814 if (found < 0)
815 error = found;
816out:
817 unlock_page(page);
818 page_cache_release(page);
819 return error;
820}
821
822
823
824
825static int shmem_writepage(struct page *page, struct writeback_control *wbc)
826{
827 struct shmem_inode_info *info;
828 struct address_space *mapping;
829 struct inode *inode;
830 swp_entry_t swap;
831 pgoff_t index;
832
833 BUG_ON(!PageLocked(page));
834 mapping = page->mapping;
835 index = page->index;
836 inode = mapping->host;
837 info = SHMEM_I(inode);
838 if (info->flags & VM_LOCKED)
839 goto redirty;
840 if (!total_swap_pages)
841 goto redirty;
842
843
844
845
846
847
848
849
850 if (!wbc->for_reclaim) {
851 WARN_ON_ONCE(1);
852 goto redirty;
853 }
854
855
856
857
858
859
860
861
862
863
864
865
866 if (!PageUptodate(page)) {
867 if (inode->i_private) {
868 struct shmem_falloc *shmem_falloc;
869 spin_lock(&inode->i_lock);
870 shmem_falloc = inode->i_private;
871 if (shmem_falloc &&
872 !shmem_falloc->waitq &&
873 index >= shmem_falloc->start &&
874 index < shmem_falloc->next)
875 shmem_falloc->nr_unswapped++;
876 else
877 shmem_falloc = NULL;
878 spin_unlock(&inode->i_lock);
879 if (shmem_falloc)
880 goto redirty;
881 }
882 clear_highpage(page);
883 flush_dcache_page(page);
884 SetPageUptodate(page);
885 }
886
887 swap = get_swap_page();
888 if (!swap.val)
889 goto redirty;
890
891
892
893
894
895
896
897
898
899 mutex_lock(&shmem_swaplist_mutex);
900 if (list_empty(&info->swaplist))
901 list_add_tail(&info->swaplist, &shmem_swaplist);
902
903 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
904 swap_shmem_alloc(swap);
905 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
906
907 spin_lock(&info->lock);
908 info->swapped++;
909 shmem_recalc_inode(inode);
910 spin_unlock(&info->lock);
911
912 mutex_unlock(&shmem_swaplist_mutex);
913 BUG_ON(page_mapped(page));
914 swap_writepage(page, wbc);
915 return 0;
916 }
917
918 mutex_unlock(&shmem_swaplist_mutex);
919 swapcache_free(swap, NULL);
920redirty:
921 set_page_dirty(page);
922 if (wbc->for_reclaim)
923 return AOP_WRITEPAGE_ACTIVATE;
924 unlock_page(page);
925 return 0;
926}
927
928#ifdef CONFIG_NUMA
929#ifdef CONFIG_TMPFS
930static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
931{
932 char buffer[64];
933
934 if (!mpol || mpol->mode == MPOL_DEFAULT)
935 return;
936
937 mpol_to_str(buffer, sizeof(buffer), mpol);
938
939 seq_printf(seq, ",mpol=%s", buffer);
940}
941
942static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
943{
944 struct mempolicy *mpol = NULL;
945 if (sbinfo->mpol) {
946 spin_lock(&sbinfo->stat_lock);
947 mpol = sbinfo->mpol;
948 mpol_get(mpol);
949 spin_unlock(&sbinfo->stat_lock);
950 }
951 return mpol;
952}
953#endif
954
955static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
956 struct shmem_inode_info *info, pgoff_t index)
957{
958 struct vm_area_struct pvma;
959 struct page *page;
960
961
962 pvma.vm_start = 0;
963
964 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
965 pvma.vm_ops = NULL;
966 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
967
968 page = swapin_readahead(swap, gfp, &pvma, 0);
969
970
971 mpol_cond_put(pvma.vm_policy);
972
973 return page;
974}
975
976static struct page *shmem_alloc_page(gfp_t gfp,
977 struct shmem_inode_info *info, pgoff_t index)
978{
979 struct vm_area_struct pvma;
980 struct page *page;
981
982
983 pvma.vm_start = 0;
984
985 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
986 pvma.vm_ops = NULL;
987 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
988
989 page = alloc_page_vma(gfp, &pvma, 0);
990
991
992 mpol_cond_put(pvma.vm_policy);
993
994 return page;
995}
996#else
997#ifdef CONFIG_TMPFS
998static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
999{
1000}
1001#endif
1002
1003static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1004 struct shmem_inode_info *info, pgoff_t index)
1005{
1006 return swapin_readahead(swap, gfp, NULL, 0);
1007}
1008
1009static inline struct page *shmem_alloc_page(gfp_t gfp,
1010 struct shmem_inode_info *info, pgoff_t index)
1011{
1012 return alloc_page(gfp);
1013}
1014#endif
1015
1016#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
1017static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1018{
1019 return NULL;
1020}
1021#endif
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
1036{
1037 return page_zonenum(page) > gfp_zone(gfp);
1038}
1039
1040static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1041 struct shmem_inode_info *info, pgoff_t index)
1042{
1043 struct page *oldpage, *newpage;
1044 struct address_space *swap_mapping;
1045 pgoff_t swap_index;
1046 int error;
1047
1048 oldpage = *pagep;
1049 swap_index = page_private(oldpage);
1050 swap_mapping = page_mapping(oldpage);
1051
1052
1053
1054
1055
1056 gfp &= ~GFP_CONSTRAINT_MASK;
1057 newpage = shmem_alloc_page(gfp, info, index);
1058 if (!newpage)
1059 return -ENOMEM;
1060
1061 page_cache_get(newpage);
1062 copy_highpage(newpage, oldpage);
1063 flush_dcache_page(newpage);
1064
1065 __set_page_locked(newpage);
1066 SetPageUptodate(newpage);
1067 SetPageSwapBacked(newpage);
1068 set_page_private(newpage, swap_index);
1069 SetPageSwapCache(newpage);
1070
1071
1072
1073
1074
1075 spin_lock_irq(&swap_mapping->tree_lock);
1076 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1077 newpage);
1078 if (!error) {
1079 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1080 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1081 }
1082 spin_unlock_irq(&swap_mapping->tree_lock);
1083
1084 if (unlikely(error)) {
1085
1086
1087
1088
1089
1090 oldpage = newpage;
1091 } else {
1092 mem_cgroup_replace_page_cache(oldpage, newpage);
1093 lru_cache_add_anon(newpage);
1094 *pagep = newpage;
1095 }
1096
1097 ClearPageSwapCache(oldpage);
1098 set_page_private(oldpage, 0);
1099
1100 unlock_page(oldpage);
1101 page_cache_release(oldpage);
1102 page_cache_release(oldpage);
1103 return error;
1104}
1105
1106
1107
1108
1109
1110
1111
1112
1113static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1114 struct page **pagep, enum sgp_type sgp, gfp_t gfp,
1115 struct vm_area_struct *vma, struct vm_fault *vmf,
1116 int *fault_type)
1117{
1118 struct address_space *mapping = inode->i_mapping;
1119 struct shmem_inode_info *info;
1120 struct shmem_sb_info *sbinfo;
1121 struct page *page;
1122 swp_entry_t swap;
1123 int error;
1124 int once = 0;
1125 int alloced = 0;
1126
1127 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1128 return -EFBIG;
1129repeat:
1130 swap.val = 0;
1131 page = __find_lock_page(mapping, index);
1132 if (radix_tree_exceptional_entry(page)) {
1133 swap = radix_to_swp_entry(page);
1134 page = NULL;
1135 }
1136
1137 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1138 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1139 error = -EINVAL;
1140 goto failed;
1141 }
1142
1143
1144 if (page && !PageUptodate(page)) {
1145 if (sgp != SGP_READ)
1146 goto clear;
1147 unlock_page(page);
1148 page_cache_release(page);
1149 page = NULL;
1150 }
1151 if (page || (sgp == SGP_READ && !swap.val)) {
1152 *pagep = page;
1153 return 0;
1154 }
1155
1156
1157
1158
1159
1160 info = SHMEM_I(inode);
1161 sbinfo = SHMEM_SB(inode->i_sb);
1162
1163 if (swap.val) {
1164
1165 page = lookup_swap_cache(swap);
1166 if (!page) {
1167
1168 if (fault_type)
1169 *fault_type |= VM_FAULT_MAJOR;
1170 page = shmem_swapin(swap, gfp, info, index);
1171 if (!page) {
1172 error = -ENOMEM;
1173 goto failed;
1174 }
1175 }
1176
1177
1178 lock_page(page);
1179 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1180 !shmem_confirm_swap(mapping, index, swap)) {
1181 error = -EEXIST;
1182 goto unlock;
1183 }
1184 if (!PageUptodate(page)) {
1185 error = -EIO;
1186 goto failed;
1187 }
1188 wait_on_page_writeback(page);
1189
1190 if (shmem_should_replace_page(page, gfp)) {
1191 error = shmem_replace_page(&page, gfp, info, index);
1192 if (error)
1193 goto failed;
1194 }
1195
1196 error = mem_cgroup_cache_charge(page, current->mm,
1197 gfp & GFP_RECLAIM_MASK);
1198 if (!error) {
1199 error = shmem_add_to_page_cache(page, mapping, index,
1200 gfp, swp_to_radix_entry(swap));
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213 if (error)
1214 delete_from_swap_cache(page);
1215 }
1216 if (error)
1217 goto failed;
1218
1219 spin_lock(&info->lock);
1220 info->swapped--;
1221 shmem_recalc_inode(inode);
1222 spin_unlock(&info->lock);
1223
1224 delete_from_swap_cache(page);
1225 set_page_dirty(page);
1226 swap_free(swap);
1227
1228 } else {
1229 if (vma && userfaultfd_missing(vma)) {
1230 *fault_type = handle_userfault(vma,
1231 (unsigned long)
1232 vmf->virtual_address,
1233 vmf->flags,
1234 VM_UFFD_MISSING);
1235 return 0;
1236 }
1237 if (shmem_acct_block(info->flags)) {
1238 error = -ENOSPC;
1239
1240 goto failed;
1241 }
1242 if (sbinfo->max_blocks) {
1243 if (percpu_counter_compare(&sbinfo->used_blocks,
1244 sbinfo->max_blocks) >= 0) {
1245 error = -ENOSPC;
1246 goto unacct;
1247 }
1248 percpu_counter_inc(&sbinfo->used_blocks);
1249 }
1250
1251 page = shmem_alloc_page(gfp, info, index);
1252 if (!page) {
1253 error = -ENOMEM;
1254 goto decused;
1255 }
1256
1257 SetPageSwapBacked(page);
1258 __set_page_locked(page);
1259 error = mem_cgroup_cache_charge(page, current->mm,
1260 gfp & GFP_RECLAIM_MASK);
1261 if (error)
1262 goto decused;
1263 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1264 if (!error) {
1265 error = shmem_add_to_page_cache(page, mapping, index,
1266 gfp, NULL);
1267 radix_tree_preload_end();
1268 }
1269 if (error) {
1270 mem_cgroup_uncharge_cache_page(page);
1271 goto decused;
1272 }
1273 lru_cache_add_anon(page);
1274
1275 spin_lock(&info->lock);
1276 info->alloced++;
1277 inode->i_blocks += BLOCKS_PER_PAGE;
1278 shmem_recalc_inode(inode);
1279 spin_unlock(&info->lock);
1280 alloced = true;
1281
1282
1283
1284
1285 if (sgp == SGP_FALLOC)
1286 sgp = SGP_WRITE;
1287clear:
1288
1289
1290
1291
1292
1293 if (sgp != SGP_WRITE) {
1294 clear_highpage(page);
1295 flush_dcache_page(page);
1296 SetPageUptodate(page);
1297 }
1298 if (sgp == SGP_DIRTY)
1299 set_page_dirty(page);
1300 }
1301
1302
1303 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1304 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1305 error = -EINVAL;
1306 if (alloced)
1307 goto trunc;
1308 else
1309 goto failed;
1310 }
1311 *pagep = page;
1312 return 0;
1313
1314
1315
1316
1317trunc:
1318 info = SHMEM_I(inode);
1319 ClearPageDirty(page);
1320 delete_from_page_cache(page);
1321 spin_lock(&info->lock);
1322 info->alloced--;
1323 inode->i_blocks -= BLOCKS_PER_PAGE;
1324 spin_unlock(&info->lock);
1325decused:
1326 sbinfo = SHMEM_SB(inode->i_sb);
1327 if (sbinfo->max_blocks)
1328 percpu_counter_add(&sbinfo->used_blocks, -1);
1329unacct:
1330 shmem_unacct_blocks(info->flags, 1);
1331failed:
1332 if (swap.val && error != -EINVAL &&
1333 !shmem_confirm_swap(mapping, index, swap))
1334 error = -EEXIST;
1335unlock:
1336 if (page) {
1337 unlock_page(page);
1338 page_cache_release(page);
1339 }
1340 if (error == -ENOSPC && !once++) {
1341 info = SHMEM_I(inode);
1342 spin_lock(&info->lock);
1343 shmem_recalc_inode(inode);
1344 spin_unlock(&info->lock);
1345 goto repeat;
1346 }
1347 if (error == -EEXIST)
1348 goto repeat;
1349 return error;
1350}
1351
1352
1353
1354
1355
1356
1357static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
1358{
1359 int ret = default_wake_function(wait, mode, sync, key);
1360 list_del_init(&wait->task_list);
1361 return ret;
1362}
1363
1364static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1365{
1366 struct inode *inode = file_inode(vma->vm_file);
1367 gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
1368 int error;
1369 int ret = VM_FAULT_LOCKED;
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388 if (unlikely(inode->i_private)) {
1389 struct shmem_falloc *shmem_falloc;
1390
1391 spin_lock(&inode->i_lock);
1392 shmem_falloc = inode->i_private;
1393 if (shmem_falloc &&
1394 shmem_falloc->waitq &&
1395 vmf->pgoff >= shmem_falloc->start &&
1396 vmf->pgoff < shmem_falloc->next) {
1397 wait_queue_head_t *shmem_falloc_waitq;
1398 DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
1399
1400 ret = VM_FAULT_NOPAGE;
1401 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1402 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1403
1404 up_read(&vma->vm_mm->mmap_sem);
1405 ret = VM_FAULT_RETRY;
1406 }
1407
1408 shmem_falloc_waitq = shmem_falloc->waitq;
1409 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1410 TASK_UNINTERRUPTIBLE);
1411 spin_unlock(&inode->i_lock);
1412 schedule();
1413
1414
1415
1416
1417
1418
1419
1420
1421 spin_lock(&inode->i_lock);
1422 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1423 spin_unlock(&inode->i_lock);
1424 return ret;
1425 }
1426 spin_unlock(&inode->i_lock);
1427 }
1428
1429 error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
1430 gfp, vma, vmf, &ret);
1431 if (error)
1432 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1433
1434 if (ret & VM_FAULT_MAJOR) {
1435 count_vm_event(PGMAJFAULT);
1436 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1437 }
1438 return ret;
1439}
1440
1441#ifdef CONFIG_NUMA
1442static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1443{
1444 struct inode *inode = file_inode(vma->vm_file);
1445 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1446}
1447
1448static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1449 unsigned long addr)
1450{
1451 struct inode *inode = file_inode(vma->vm_file);
1452 pgoff_t index;
1453
1454 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1455 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1456}
1457#endif
1458
1459int shmem_lock(struct file *file, int lock, struct user_struct *user)
1460{
1461 struct inode *inode = file_inode(file);
1462 struct shmem_inode_info *info = SHMEM_I(inode);
1463 int retval = -ENOMEM;
1464
1465 spin_lock(&info->lock);
1466 if (lock && !(info->flags & VM_LOCKED)) {
1467 if (!user_shm_lock(inode->i_size, user))
1468 goto out_nomem;
1469 info->flags |= VM_LOCKED;
1470 mapping_set_unevictable(file->f_mapping);
1471 }
1472 if (!lock && (info->flags & VM_LOCKED) && user) {
1473 user_shm_unlock(inode->i_size, user);
1474 info->flags &= ~VM_LOCKED;
1475 mapping_clear_unevictable(file->f_mapping);
1476 }
1477 retval = 0;
1478
1479out_nomem:
1480 spin_unlock(&info->lock);
1481 return retval;
1482}
1483
1484static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1485{
1486 file_accessed(file);
1487 vma->vm_ops = &shmem_vm_ops;
1488 return 0;
1489}
1490
1491static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1492 umode_t mode, dev_t dev, unsigned long flags)
1493{
1494 struct inode *inode;
1495 struct shmem_inode_info *info;
1496 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1497
1498 if (shmem_reserve_inode(sb))
1499 return NULL;
1500
1501 inode = new_inode(sb);
1502 if (inode) {
1503 inode->i_ino = get_next_ino();
1504 inode_init_owner(inode, dir, mode);
1505 inode->i_blocks = 0;
1506 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1507 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1508 inode->i_generation = get_seconds();
1509 info = SHMEM_I(inode);
1510 memset(info, 0, (char *)inode - (char *)info);
1511 spin_lock_init(&info->lock);
1512 info->seals = F_SEAL_SEAL;
1513 info->flags = flags & VM_NORESERVE;
1514 INIT_LIST_HEAD(&info->swaplist);
1515 simple_xattrs_init(&info->xattrs);
1516 cache_no_acl(inode);
1517
1518 switch (mode & S_IFMT) {
1519 default:
1520 inode->i_op = &shmem_special_inode_operations;
1521 init_special_inode(inode, mode, dev);
1522 break;
1523 case S_IFREG:
1524 inode->i_mapping->a_ops = &shmem_aops;
1525 inode->i_op = &shmem_inode_operations;
1526 inode->i_fop = &shmem_file_operations;
1527 mpol_shared_policy_init(&info->policy,
1528 shmem_get_sbmpol(sbinfo));
1529 break;
1530 case S_IFDIR:
1531 inc_nlink(inode);
1532
1533 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1534 inode->i_op = &shmem_dir_inode_operations.ops;
1535 inode->i_fop = &simple_dir_operations;
1536 inode->i_flags |= S_IOPS_WRAPPER;
1537 break;
1538 case S_IFLNK:
1539
1540
1541
1542
1543 mpol_shared_policy_init(&info->policy, NULL);
1544 break;
1545 }
1546 } else
1547 shmem_free_inode(sb);
1548 return inode;
1549}
1550
1551bool shmem_mapping(struct address_space *mapping)
1552{
1553 return mapping->backing_dev_info == &shmem_backing_dev_info;
1554}
1555
1556int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
1557 pmd_t *dst_pmd,
1558 struct vm_area_struct *dst_vma,
1559 unsigned long dst_addr,
1560 unsigned long src_addr,
1561 struct page **pagep)
1562{
1563 struct inode *inode = file_inode(dst_vma->vm_file);
1564 struct shmem_inode_info *info = SHMEM_I(inode);
1565 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1566 struct address_space *mapping = inode->i_mapping;
1567 gfp_t gfp = mapping_gfp_mask(mapping);
1568 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
1569 spinlock_t *ptl;
1570 void *page_kaddr;
1571 struct page *page;
1572 pte_t _dst_pte, *dst_pte;
1573 int ret;
1574
1575 ret = -ENOMEM;
1576 if (shmem_acct_block(info->flags))
1577 goto out;
1578 if (sbinfo->max_blocks) {
1579 if (percpu_counter_compare(&sbinfo->used_blocks,
1580 sbinfo->max_blocks) >= 0)
1581 goto out_unacct_blocks;
1582 percpu_counter_inc(&sbinfo->used_blocks);
1583 }
1584
1585 if (!*pagep) {
1586 page = shmem_alloc_page(gfp, info, pgoff);
1587 if (!page)
1588 goto out_dec_used_blocks;
1589
1590 page_kaddr = kmap_atomic(page);
1591 ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
1592 PAGE_SIZE);
1593 kunmap_atomic(page_kaddr);
1594
1595
1596 if (unlikely(ret)) {
1597 *pagep = page;
1598 if (sbinfo->max_blocks)
1599 percpu_counter_add(&sbinfo->used_blocks, -1);
1600 shmem_unacct_blocks(info->flags, 1);
1601
1602 return -EFAULT;
1603 }
1604 } else {
1605 page = *pagep;
1606 *pagep = NULL;
1607 }
1608
1609 VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
1610 __set_page_locked(page);
1611 __SetPageSwapBacked(page);
1612 __SetPageUptodate(page);
1613
1614 ret = mem_cgroup_cache_charge(page, dst_mm,
1615 gfp & GFP_RECLAIM_MASK);
1616 if (ret)
1617 goto out_release;
1618
1619 ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1620 if (!ret) {
1621 ret = shmem_add_to_page_cache(page, mapping, pgoff, gfp, NULL);
1622 radix_tree_preload_end();
1623 }
1624 if (ret)
1625 goto out_release_uncharge;
1626
1627 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
1628 if (dst_vma->vm_flags & VM_WRITE)
1629 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
1630
1631 ret = -EEXIST;
1632 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
1633 if (!pte_none(*dst_pte))
1634 goto out_release_uncharge_unlock;
1635
1636 lru_cache_add_anon(page);
1637
1638 spin_lock(&info->lock);
1639 info->alloced++;
1640 inode->i_blocks += BLOCKS_PER_PAGE;
1641 shmem_recalc_inode(inode);
1642 spin_unlock(&info->lock);
1643
1644 inc_mm_counter(dst_mm, mm_counter_file(page));
1645 page_add_file_rmap(page);
1646 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
1647
1648
1649 update_mmu_cache(dst_vma, dst_addr, dst_pte);
1650 unlock_page(page);
1651 pte_unmap_unlock(dst_pte, ptl);
1652 ret = 0;
1653out:
1654 return ret;
1655out_release_uncharge_unlock:
1656 pte_unmap_unlock(dst_pte, ptl);
1657out_release_uncharge:
1658 mem_cgroup_uncharge_cache_page(page);
1659out_release:
1660 unlock_page(page);
1661 put_page(page);
1662out_dec_used_blocks:
1663 if (sbinfo->max_blocks)
1664 percpu_counter_add(&sbinfo->used_blocks, -1);
1665out_unacct_blocks:
1666 shmem_unacct_blocks(info->flags, 1);
1667 goto out;
1668}
1669
1670#ifdef CONFIG_TMPFS
1671static const struct inode_operations shmem_symlink_inode_operations;
1672static const struct inode_operations shmem_short_symlink_operations;
1673
1674#ifdef CONFIG_TMPFS_XATTR
1675static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1676#else
1677#define shmem_initxattrs NULL
1678#endif
1679
1680static int
1681shmem_write_begin(struct file *file, struct address_space *mapping,
1682 loff_t pos, unsigned len, unsigned flags,
1683 struct page **pagep, void **fsdata)
1684{
1685 struct inode *inode = mapping->host;
1686 struct shmem_inode_info *info = SHMEM_I(inode);
1687 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1688
1689
1690 if (unlikely(info->seals)) {
1691 if (info->seals & F_SEAL_WRITE)
1692 return -EPERM;
1693 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1694 return -EPERM;
1695 }
1696
1697 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1698}
1699
1700static int
1701shmem_write_end(struct file *file, struct address_space *mapping,
1702 loff_t pos, unsigned len, unsigned copied,
1703 struct page *page, void *fsdata)
1704{
1705 struct inode *inode = mapping->host;
1706
1707 if (pos + copied > inode->i_size)
1708 i_size_write(inode, pos + copied);
1709
1710 if (!PageUptodate(page)) {
1711 if (copied < PAGE_CACHE_SIZE) {
1712 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1713 zero_user_segments(page, 0, from,
1714 from + copied, PAGE_CACHE_SIZE);
1715 }
1716 SetPageUptodate(page);
1717 }
1718 set_page_dirty(page);
1719 unlock_page(page);
1720 page_cache_release(page);
1721
1722 return copied;
1723}
1724
1725static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1726{
1727 struct inode *inode = file_inode(filp);
1728 struct address_space *mapping = inode->i_mapping;
1729 pgoff_t index;
1730 unsigned long offset;
1731 enum sgp_type sgp = SGP_READ;
1732
1733
1734
1735
1736
1737
1738 if (segment_eq(get_fs(), KERNEL_DS))
1739 sgp = SGP_DIRTY;
1740
1741 index = *ppos >> PAGE_CACHE_SHIFT;
1742 offset = *ppos & ~PAGE_CACHE_MASK;
1743
1744 for (;;) {
1745 struct page *page = NULL;
1746 pgoff_t end_index;
1747 unsigned long nr, ret;
1748 loff_t i_size = i_size_read(inode);
1749
1750 end_index = i_size >> PAGE_CACHE_SHIFT;
1751 if (index > end_index)
1752 break;
1753 if (index == end_index) {
1754 nr = i_size & ~PAGE_CACHE_MASK;
1755 if (nr <= offset)
1756 break;
1757 }
1758
1759 desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1760 if (desc->error) {
1761 if (desc->error == -EINVAL)
1762 desc->error = 0;
1763 break;
1764 }
1765 if (page)
1766 unlock_page(page);
1767
1768
1769
1770
1771
1772 nr = PAGE_CACHE_SIZE;
1773 i_size = i_size_read(inode);
1774 end_index = i_size >> PAGE_CACHE_SHIFT;
1775 if (index == end_index) {
1776 nr = i_size & ~PAGE_CACHE_MASK;
1777 if (nr <= offset) {
1778 if (page)
1779 page_cache_release(page);
1780 break;
1781 }
1782 }
1783 nr -= offset;
1784
1785 if (page) {
1786
1787
1788
1789
1790
1791 if (mapping_writably_mapped(mapping))
1792 flush_dcache_page(page);
1793
1794
1795
1796 if (!offset)
1797 mark_page_accessed(page);
1798 } else {
1799 page = ZERO_PAGE(0);
1800 page_cache_get(page);
1801 }
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813 ret = actor(desc, page, offset, nr);
1814 offset += ret;
1815 index += offset >> PAGE_CACHE_SHIFT;
1816 offset &= ~PAGE_CACHE_MASK;
1817
1818 page_cache_release(page);
1819 if (ret != nr || !desc->count)
1820 break;
1821
1822 cond_resched();
1823 }
1824
1825 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1826 file_accessed(filp);
1827}
1828
1829static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1830 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1831{
1832 struct file *filp = iocb->ki_filp;
1833 ssize_t retval;
1834 unsigned long seg;
1835 size_t count;
1836 loff_t *ppos = &iocb->ki_pos;
1837
1838 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1839 if (retval)
1840 return retval;
1841
1842 for (seg = 0; seg < nr_segs; seg++) {
1843 read_descriptor_t desc;
1844
1845 desc.written = 0;
1846 desc.arg.buf = iov[seg].iov_base;
1847 desc.count = iov[seg].iov_len;
1848 if (desc.count == 0)
1849 continue;
1850 desc.error = 0;
1851 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1852 retval += desc.written;
1853 if (desc.error) {
1854 retval = retval ?: desc.error;
1855 break;
1856 }
1857 if (desc.count > 0)
1858 break;
1859 }
1860 return retval;
1861}
1862
1863static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1864 struct pipe_inode_info *pipe, size_t len,
1865 unsigned int flags)
1866{
1867 struct address_space *mapping = in->f_mapping;
1868 struct inode *inode = mapping->host;
1869 unsigned int loff, nr_pages, req_pages;
1870 struct page *pages[PIPE_DEF_BUFFERS];
1871 struct partial_page partial[PIPE_DEF_BUFFERS];
1872 struct page *page;
1873 pgoff_t index, end_index;
1874 loff_t isize, left;
1875 int error, page_nr;
1876 struct splice_pipe_desc spd = {
1877 .pages = pages,
1878 .partial = partial,
1879 .nr_pages_max = PIPE_DEF_BUFFERS,
1880 .flags = flags,
1881 .ops = &page_cache_pipe_buf_ops,
1882 .spd_release = spd_release_page,
1883 };
1884
1885 isize = i_size_read(inode);
1886 if (unlikely(*ppos >= isize))
1887 return 0;
1888
1889 left = isize - *ppos;
1890 if (unlikely(left < len))
1891 len = left;
1892
1893 if (splice_grow_spd(pipe, &spd))
1894 return -ENOMEM;
1895
1896 index = *ppos >> PAGE_CACHE_SHIFT;
1897 loff = *ppos & ~PAGE_CACHE_MASK;
1898 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1899 nr_pages = min(req_pages, pipe->buffers);
1900
1901 spd.nr_pages = find_get_pages_contig(mapping, index,
1902 nr_pages, spd.pages);
1903 index += spd.nr_pages;
1904 error = 0;
1905
1906 while (spd.nr_pages < nr_pages) {
1907 error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1908 if (error)
1909 break;
1910 unlock_page(page);
1911 spd.pages[spd.nr_pages++] = page;
1912 index++;
1913 }
1914
1915 index = *ppos >> PAGE_CACHE_SHIFT;
1916 nr_pages = spd.nr_pages;
1917 spd.nr_pages = 0;
1918
1919 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1920 unsigned int this_len;
1921
1922 if (!len)
1923 break;
1924
1925 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1926 page = spd.pages[page_nr];
1927
1928 if (!PageUptodate(page) || page->mapping != mapping) {
1929 error = shmem_getpage(inode, index, &page,
1930 SGP_CACHE, NULL);
1931 if (error)
1932 break;
1933 unlock_page(page);
1934 page_cache_release(spd.pages[page_nr]);
1935 spd.pages[page_nr] = page;
1936 }
1937
1938 isize = i_size_read(inode);
1939 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1940 if (unlikely(!isize || index > end_index))
1941 break;
1942
1943 if (end_index == index) {
1944 unsigned int plen;
1945
1946 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1947 if (plen <= loff)
1948 break;
1949
1950 this_len = min(this_len, plen - loff);
1951 len = this_len;
1952 }
1953
1954 spd.partial[page_nr].offset = loff;
1955 spd.partial[page_nr].len = this_len;
1956 len -= this_len;
1957 loff = 0;
1958 spd.nr_pages++;
1959 index++;
1960 }
1961
1962 while (page_nr < nr_pages)
1963 page_cache_release(spd.pages[page_nr++]);
1964
1965 if (spd.nr_pages)
1966 error = splice_to_pipe(pipe, &spd);
1967
1968 splice_shrink_spd(&spd);
1969
1970 if (error > 0) {
1971 *ppos += error;
1972 file_accessed(in);
1973 }
1974 return error;
1975}
1976
1977
1978
1979
1980static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1981 pgoff_t index, pgoff_t end, int whence)
1982{
1983 struct page *page;
1984 struct pagevec pvec;
1985 pgoff_t indices[PAGEVEC_SIZE];
1986 bool done = false;
1987 int i;
1988
1989 pagevec_init(&pvec, 0);
1990 pvec.nr = 1;
1991 while (!done) {
1992 pvec.nr = __find_get_pages(mapping, index,
1993 pvec.nr, pvec.pages, indices);
1994 if (!pvec.nr) {
1995 if (whence == SEEK_DATA)
1996 index = end;
1997 break;
1998 }
1999 for (i = 0; i < pvec.nr; i++, index++) {
2000 if (index < indices[i]) {
2001 if (whence == SEEK_HOLE) {
2002 done = true;
2003 break;
2004 }
2005 index = indices[i];
2006 }
2007 page = pvec.pages[i];
2008 if (page && !radix_tree_exceptional_entry(page)) {
2009 if (!PageUptodate(page))
2010 page = NULL;
2011 }
2012 if (index >= end ||
2013 (page && whence == SEEK_DATA) ||
2014 (!page && whence == SEEK_HOLE)) {
2015 done = true;
2016 break;
2017 }
2018 }
2019 pagevec_remove_exceptionals(&pvec);
2020 pagevec_release(&pvec);
2021 pvec.nr = PAGEVEC_SIZE;
2022 cond_resched();
2023 }
2024 return index;
2025}
2026
2027static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
2028{
2029 struct address_space *mapping = file->f_mapping;
2030 struct inode *inode = mapping->host;
2031 pgoff_t start, end;
2032 loff_t new_offset;
2033
2034 if (whence != SEEK_DATA && whence != SEEK_HOLE)
2035 return generic_file_llseek_size(file, offset, whence,
2036 MAX_LFS_FILESIZE, i_size_read(inode));
2037 mutex_lock(&inode->i_mutex);
2038
2039
2040 if (offset < 0)
2041 offset = -EINVAL;
2042 else if (offset >= inode->i_size)
2043 offset = -ENXIO;
2044 else {
2045 start = offset >> PAGE_CACHE_SHIFT;
2046 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2047 new_offset = shmem_seek_hole_data(mapping, start, end, whence);
2048 new_offset <<= PAGE_CACHE_SHIFT;
2049 if (new_offset > offset) {
2050 if (new_offset < inode->i_size)
2051 offset = new_offset;
2052 else if (whence == SEEK_DATA)
2053 offset = -ENXIO;
2054 else
2055 offset = inode->i_size;
2056 }
2057 }
2058
2059 if (offset >= 0)
2060 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
2061 mutex_unlock(&inode->i_mutex);
2062 return offset;
2063}
2064
2065
2066
2067
2068
2069#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
2070#define LAST_SCAN 4
2071
2072static void shmem_tag_pins(struct address_space *mapping)
2073{
2074 struct radix_tree_iter iter;
2075 void **slot;
2076 pgoff_t start;
2077 struct page *page;
2078
2079 lru_add_drain();
2080 start = 0;
2081 rcu_read_lock();
2082
2083restart:
2084 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
2085 page = radix_tree_deref_slot(slot);
2086 if (!page || radix_tree_exception(page)) {
2087 if (radix_tree_deref_retry(page))
2088 goto restart;
2089 } else if (page_count(page) - page_mapcount(page) > 1) {
2090 spin_lock_irq(&mapping->tree_lock);
2091 radix_tree_tag_set(&mapping->page_tree, iter.index,
2092 SHMEM_TAG_PINNED);
2093 spin_unlock_irq(&mapping->tree_lock);
2094 }
2095
2096 if (need_resched()) {
2097 cond_resched_rcu();
2098 start = iter.index + 1;
2099 goto restart;
2100 }
2101 }
2102 rcu_read_unlock();
2103}
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114static int shmem_wait_for_pins(struct address_space *mapping)
2115{
2116 struct radix_tree_iter iter;
2117 void **slot;
2118 pgoff_t start;
2119 struct page *page;
2120 int error, scan;
2121
2122 shmem_tag_pins(mapping);
2123
2124 error = 0;
2125 for (scan = 0; scan <= LAST_SCAN; scan++) {
2126 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
2127 break;
2128
2129 if (!scan)
2130 lru_add_drain_all();
2131 else if (schedule_timeout_killable((HZ << scan) / 200))
2132 scan = LAST_SCAN;
2133
2134 start = 0;
2135 rcu_read_lock();
2136restart:
2137 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
2138 start, SHMEM_TAG_PINNED) {
2139
2140 page = radix_tree_deref_slot(slot);
2141 if (radix_tree_exception(page)) {
2142 if (radix_tree_deref_retry(page))
2143 goto restart;
2144
2145 page = NULL;
2146 }
2147
2148 if (page &&
2149 page_count(page) - page_mapcount(page) != 1) {
2150 if (scan < LAST_SCAN)
2151 goto continue_resched;
2152
2153
2154
2155
2156
2157
2158 error = -EBUSY;
2159 }
2160
2161 spin_lock_irq(&mapping->tree_lock);
2162 radix_tree_tag_clear(&mapping->page_tree,
2163 iter.index, SHMEM_TAG_PINNED);
2164 spin_unlock_irq(&mapping->tree_lock);
2165continue_resched:
2166 if (need_resched()) {
2167 cond_resched_rcu();
2168 start = iter.index + 1;
2169 goto restart;
2170 }
2171 }
2172 rcu_read_unlock();
2173 }
2174
2175 return error;
2176}
2177
2178#define F_ALL_SEALS (F_SEAL_SEAL | \
2179 F_SEAL_SHRINK | \
2180 F_SEAL_GROW | \
2181 F_SEAL_WRITE)
2182
2183int shmem_add_seals(struct file *file, unsigned int seals)
2184{
2185 struct inode *inode = file_inode(file);
2186 struct shmem_inode_info *info = SHMEM_I(inode);
2187 int error;
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219 if (file->f_op != &shmem_file_operations)
2220 return -EINVAL;
2221 if (!(file->f_mode & FMODE_WRITE))
2222 return -EPERM;
2223 if (seals & ~(unsigned int)F_ALL_SEALS)
2224 return -EINVAL;
2225
2226 mutex_lock(&inode->i_mutex);
2227
2228 if (info->seals & F_SEAL_SEAL) {
2229 error = -EPERM;
2230 goto unlock;
2231 }
2232
2233 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
2234 error = mapping_deny_writable(file->f_mapping);
2235 if (error)
2236 goto unlock;
2237
2238 error = shmem_wait_for_pins(file->f_mapping);
2239 if (error) {
2240 mapping_allow_writable(file->f_mapping);
2241 goto unlock;
2242 }
2243 }
2244
2245 info->seals |= seals;
2246 error = 0;
2247
2248unlock:
2249 mutex_unlock(&inode->i_mutex);
2250 return error;
2251}
2252EXPORT_SYMBOL_GPL(shmem_add_seals);
2253
2254int shmem_get_seals(struct file *file)
2255{
2256 if (file->f_op != &shmem_file_operations)
2257 return -EINVAL;
2258
2259 return SHMEM_I(file_inode(file))->seals;
2260}
2261EXPORT_SYMBOL_GPL(shmem_get_seals);
2262
2263long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2264{
2265 long error;
2266
2267 switch (cmd) {
2268 case F_ADD_SEALS:
2269
2270 if (arg > UINT_MAX)
2271 return -EINVAL;
2272
2273 error = shmem_add_seals(file, arg);
2274 break;
2275 case F_GET_SEALS:
2276 error = shmem_get_seals(file);
2277 break;
2278 default:
2279 error = -EINVAL;
2280 break;
2281 }
2282
2283 return error;
2284}
2285
2286static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2287 loff_t len)
2288{
2289 struct inode *inode = file_inode(file);
2290 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2291 struct shmem_inode_info *info = SHMEM_I(inode);
2292 struct shmem_falloc shmem_falloc;
2293 pgoff_t start, index, end;
2294 int error;
2295
2296 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2297 return -EOPNOTSUPP;
2298
2299 mutex_lock(&inode->i_mutex);
2300
2301 if (mode & FALLOC_FL_PUNCH_HOLE) {
2302 struct address_space *mapping = file->f_mapping;
2303 loff_t unmap_start = round_up(offset, PAGE_SIZE);
2304 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
2305 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
2306
2307
2308 if (info->seals & F_SEAL_WRITE) {
2309 error = -EPERM;
2310 goto out;
2311 }
2312
2313 shmem_falloc.waitq = &shmem_falloc_waitq;
2314 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
2315 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
2316 spin_lock(&inode->i_lock);
2317 inode->i_private = &shmem_falloc;
2318 spin_unlock(&inode->i_lock);
2319
2320 if ((u64)unmap_end > (u64)unmap_start)
2321 unmap_mapping_range(mapping, unmap_start,
2322 1 + unmap_end - unmap_start, 0);
2323 shmem_truncate_range(inode, offset, offset + len - 1);
2324
2325
2326 spin_lock(&inode->i_lock);
2327 inode->i_private = NULL;
2328 wake_up_all(&shmem_falloc_waitq);
2329 WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
2330 spin_unlock(&inode->i_lock);
2331 error = 0;
2332 goto out;
2333 }
2334
2335
2336 error = inode_newsize_ok(inode, offset + len);
2337 if (error)
2338 goto out;
2339
2340 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2341 error = -EPERM;
2342 goto out;
2343 }
2344
2345 start = offset >> PAGE_CACHE_SHIFT;
2346 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2347
2348 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
2349 error = -ENOSPC;
2350 goto out;
2351 }
2352
2353 shmem_falloc.waitq = NULL;
2354 shmem_falloc.start = start;
2355 shmem_falloc.next = start;
2356 shmem_falloc.nr_falloced = 0;
2357 shmem_falloc.nr_unswapped = 0;
2358 spin_lock(&inode->i_lock);
2359 inode->i_private = &shmem_falloc;
2360 spin_unlock(&inode->i_lock);
2361
2362 for (index = start; index < end; index++) {
2363 struct page *page;
2364
2365
2366
2367
2368
2369 if (signal_pending(current))
2370 error = -EINTR;
2371 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
2372 error = -ENOMEM;
2373 else
2374 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
2375 NULL);
2376 if (error) {
2377
2378 shmem_undo_range(inode,
2379 (loff_t)start << PAGE_CACHE_SHIFT,
2380 (loff_t)index << PAGE_CACHE_SHIFT, true);
2381 goto undone;
2382 }
2383
2384
2385
2386
2387
2388 shmem_falloc.next++;
2389 if (!PageUptodate(page))
2390 shmem_falloc.nr_falloced++;
2391
2392
2393
2394
2395
2396
2397
2398
2399 set_page_dirty(page);
2400 unlock_page(page);
2401 page_cache_release(page);
2402 cond_resched();
2403 }
2404
2405 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
2406 i_size_write(inode, offset + len);
2407 inode->i_ctime = CURRENT_TIME;
2408undone:
2409 spin_lock(&inode->i_lock);
2410 inode->i_private = NULL;
2411 spin_unlock(&inode->i_lock);
2412out:
2413 mutex_unlock(&inode->i_mutex);
2414 return error;
2415}
2416
2417static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
2418{
2419 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
2420
2421 buf->f_type = TMPFS_MAGIC;
2422 buf->f_bsize = PAGE_CACHE_SIZE;
2423 buf->f_namelen = NAME_MAX;
2424 if (sbinfo->max_blocks) {
2425 buf->f_blocks = sbinfo->max_blocks;
2426 buf->f_bavail =
2427 buf->f_bfree = sbinfo->max_blocks -
2428 percpu_counter_sum(&sbinfo->used_blocks);
2429 }
2430 if (sbinfo->max_inodes) {
2431 buf->f_files = sbinfo->max_inodes;
2432 buf->f_ffree = sbinfo->free_inodes;
2433 }
2434
2435 return 0;
2436}
2437
2438
2439
2440
2441static int
2442shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2443{
2444 struct inode *inode;
2445 int error = -ENOSPC;
2446
2447 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
2448 if (inode) {
2449 error = security_inode_init_security(inode, dir,
2450 &dentry->d_name,
2451 shmem_initxattrs, NULL);
2452 if (error) {
2453 if (error != -EOPNOTSUPP) {
2454 iput(inode);
2455 return error;
2456 }
2457 }
2458#ifdef CONFIG_TMPFS_POSIX_ACL
2459 error = generic_acl_init(inode, dir);
2460 if (error) {
2461 iput(inode);
2462 return error;
2463 }
2464#else
2465 error = 0;
2466#endif
2467 dir->i_size += BOGO_DIRENT_SIZE;
2468 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2469 d_instantiate(dentry, inode);
2470 dget(dentry);
2471 }
2472 return error;
2473}
2474
2475static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2476{
2477 int error;
2478
2479 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
2480 return error;
2481 inc_nlink(dir);
2482 return 0;
2483}
2484
2485static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2486 bool excl)
2487{
2488 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
2489}
2490
2491
2492
2493
2494static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2495{
2496 struct inode *inode = old_dentry->d_inode;
2497 int ret;
2498
2499
2500
2501
2502
2503
2504 ret = shmem_reserve_inode(inode->i_sb);
2505 if (ret)
2506 goto out;
2507
2508 dir->i_size += BOGO_DIRENT_SIZE;
2509 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2510 inc_nlink(inode);
2511 ihold(inode);
2512 dget(dentry);
2513 d_instantiate(dentry, inode);
2514out:
2515 return ret;
2516}
2517
2518static int shmem_unlink(struct inode *dir, struct dentry *dentry)
2519{
2520 struct inode *inode = dentry->d_inode;
2521
2522 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
2523 shmem_free_inode(inode->i_sb);
2524
2525 dir->i_size -= BOGO_DIRENT_SIZE;
2526 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2527 drop_nlink(inode);
2528 dput(dentry);
2529 return 0;
2530}
2531
2532static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
2533{
2534 if (!simple_empty(dentry))
2535 return -ENOTEMPTY;
2536
2537 drop_nlink(dentry->d_inode);
2538 drop_nlink(dir);
2539 return shmem_unlink(dir, dentry);
2540}
2541
2542static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2543{
2544 bool old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2545 bool new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode);
2546
2547 if (old_dir != new_dir && old_is_dir != new_is_dir) {
2548 if (old_is_dir) {
2549 drop_nlink(old_dir);
2550 inc_nlink(new_dir);
2551 } else {
2552 drop_nlink(new_dir);
2553 inc_nlink(old_dir);
2554 }
2555 }
2556 old_dir->i_ctime = old_dir->i_mtime =
2557 new_dir->i_ctime = new_dir->i_mtime =
2558 old_dentry->d_inode->i_ctime =
2559 new_dentry->d_inode->i_ctime = CURRENT_TIME;
2560
2561 return 0;
2562}
2563
2564static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2565{
2566 struct dentry *whiteout;
2567 int error;
2568
2569 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2570 if (!whiteout)
2571 return -ENOMEM;
2572
2573 error = shmem_mknod(old_dir, whiteout,
2574 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2575 dput(whiteout);
2576 if (error)
2577 return error;
2578
2579
2580
2581
2582
2583
2584
2585
2586 d_rehash(whiteout);
2587 return 0;
2588}
2589
2590
2591
2592
2593
2594
2595
2596static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
2597{
2598 struct inode *inode = old_dentry->d_inode;
2599 int they_are_dirs = S_ISDIR(inode->i_mode);
2600
2601 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2602 return -EINVAL;
2603
2604 if (flags & RENAME_EXCHANGE)
2605 return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
2606
2607 if (!simple_empty(new_dentry))
2608 return -ENOTEMPTY;
2609
2610 if (flags & RENAME_WHITEOUT) {
2611 int error;
2612
2613 error = shmem_whiteout(old_dir, old_dentry);
2614 if (error)
2615 return error;
2616 }
2617
2618 if (new_dentry->d_inode) {
2619 (void) shmem_unlink(new_dir, new_dentry);
2620 if (they_are_dirs)
2621 drop_nlink(old_dir);
2622 } else if (they_are_dirs) {
2623 drop_nlink(old_dir);
2624 inc_nlink(new_dir);
2625 }
2626
2627 old_dir->i_size -= BOGO_DIRENT_SIZE;
2628 new_dir->i_size += BOGO_DIRENT_SIZE;
2629 old_dir->i_ctime = old_dir->i_mtime =
2630 new_dir->i_ctime = new_dir->i_mtime =
2631 inode->i_ctime = CURRENT_TIME;
2632 return 0;
2633}
2634
2635static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2636{
2637 return shmem_rename2(old_dir, old_dentry, new_dir, new_dentry, 0);
2638}
2639
2640static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2641{
2642 int error;
2643 int len;
2644 struct inode *inode;
2645 struct page *page;
2646 char *kaddr;
2647 struct shmem_inode_info *info;
2648
2649 len = strlen(symname) + 1;
2650 if (len > PAGE_CACHE_SIZE)
2651 return -ENAMETOOLONG;
2652
2653 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
2654 if (!inode)
2655 return -ENOSPC;
2656
2657 error = security_inode_init_security(inode, dir, &dentry->d_name,
2658 shmem_initxattrs, NULL);
2659 if (error) {
2660 if (error != -EOPNOTSUPP) {
2661 iput(inode);
2662 return error;
2663 }
2664 error = 0;
2665 }
2666
2667 info = SHMEM_I(inode);
2668 inode->i_size = len-1;
2669 if (len <= SHORT_SYMLINK_LEN) {
2670 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2671 if (!info->symlink) {
2672 iput(inode);
2673 return -ENOMEM;
2674 }
2675 inode->i_op = &shmem_short_symlink_operations;
2676 } else {
2677 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2678 if (error) {
2679 iput(inode);
2680 return error;
2681 }
2682 inode->i_mapping->a_ops = &shmem_aops;
2683 inode->i_op = &shmem_symlink_inode_operations;
2684 kaddr = kmap_atomic(page);
2685 memcpy(kaddr, symname, len);
2686 kunmap_atomic(kaddr);
2687 SetPageUptodate(page);
2688 set_page_dirty(page);
2689 unlock_page(page);
2690 page_cache_release(page);
2691 }
2692 dir->i_size += BOGO_DIRENT_SIZE;
2693 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2694 d_instantiate(dentry, inode);
2695 dget(dentry);
2696 return 0;
2697}
2698
2699static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2700{
2701 nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2702 return NULL;
2703}
2704
2705static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2706{
2707 struct page *page = NULL;
2708 int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2709 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2710 if (page)
2711 unlock_page(page);
2712 return page;
2713}
2714
2715static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2716{
2717 if (!IS_ERR(nd_get_link(nd))) {
2718 struct page *page = cookie;
2719 kunmap(page);
2720 mark_page_accessed(page);
2721 page_cache_release(page);
2722 }
2723}
2724
2725#ifdef CONFIG_TMPFS_XATTR
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736static int shmem_initxattrs(struct inode *inode,
2737 const struct xattr *xattr_array,
2738 void *fs_info)
2739{
2740 struct shmem_inode_info *info = SHMEM_I(inode);
2741 const struct xattr *xattr;
2742 struct simple_xattr *new_xattr;
2743 size_t len;
2744
2745 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
2746 new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
2747 if (!new_xattr)
2748 return -ENOMEM;
2749
2750 len = strlen(xattr->name) + 1;
2751 new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
2752 GFP_KERNEL);
2753 if (!new_xattr->name) {
2754 kfree(new_xattr);
2755 return -ENOMEM;
2756 }
2757
2758 memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
2759 XATTR_SECURITY_PREFIX_LEN);
2760 memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
2761 xattr->name, len);
2762
2763 simple_xattr_list_add(&info->xattrs, new_xattr);
2764 }
2765
2766 return 0;
2767}
2768
2769static const struct xattr_handler *shmem_xattr_handlers[] = {
2770#ifdef CONFIG_TMPFS_POSIX_ACL
2771 &generic_acl_access_handler,
2772 &generic_acl_default_handler,
2773#endif
2774 NULL
2775};
2776
2777static int shmem_xattr_validate(const char *name)
2778{
2779 struct { const char *prefix; size_t len; } arr[] = {
2780 { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
2781 { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
2782 };
2783 int i;
2784
2785 for (i = 0; i < ARRAY_SIZE(arr); i++) {
2786 size_t preflen = arr[i].len;
2787 if (strncmp(name, arr[i].prefix, preflen) == 0) {
2788 if (!name[preflen])
2789 return -EINVAL;
2790 return 0;
2791 }
2792 }
2793 return -EOPNOTSUPP;
2794}
2795
2796static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
2797 void *buffer, size_t size)
2798{
2799 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2800 int err;
2801
2802
2803
2804
2805
2806
2807 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2808 return generic_getxattr(dentry, name, buffer, size);
2809
2810 err = shmem_xattr_validate(name);
2811 if (err)
2812 return err;
2813
2814 return simple_xattr_get(&info->xattrs, name, buffer, size);
2815}
2816
2817static int shmem_setxattr(struct dentry *dentry, const char *name,
2818 const void *value, size_t size, int flags)
2819{
2820 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2821 int err;
2822
2823
2824
2825
2826
2827
2828 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2829 return generic_setxattr(dentry, name, value, size, flags);
2830
2831 err = shmem_xattr_validate(name);
2832 if (err)
2833 return err;
2834
2835 return simple_xattr_set(&info->xattrs, name, value, size, flags);
2836}
2837
2838static int shmem_removexattr(struct dentry *dentry, const char *name)
2839{
2840 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2841 int err;
2842
2843
2844
2845
2846
2847
2848 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2849 return generic_removexattr(dentry, name);
2850
2851 err = shmem_xattr_validate(name);
2852 if (err)
2853 return err;
2854
2855 return simple_xattr_remove(&info->xattrs, name);
2856}
2857
2858static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2859{
2860 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2861 return simple_xattr_list(&info->xattrs, buffer, size);
2862}
2863#endif
2864
2865static const struct inode_operations shmem_short_symlink_operations = {
2866 .readlink = generic_readlink,
2867 .follow_link = shmem_follow_short_symlink,
2868#ifdef CONFIG_TMPFS_XATTR
2869 .setxattr = shmem_setxattr,
2870 .getxattr = shmem_getxattr,
2871 .listxattr = shmem_listxattr,
2872 .removexattr = shmem_removexattr,
2873#endif
2874};
2875
2876static const struct inode_operations shmem_symlink_inode_operations = {
2877 .readlink = generic_readlink,
2878 .follow_link = shmem_follow_link,
2879 .put_link = shmem_put_link,
2880#ifdef CONFIG_TMPFS_XATTR
2881 .setxattr = shmem_setxattr,
2882 .getxattr = shmem_getxattr,
2883 .listxattr = shmem_listxattr,
2884 .removexattr = shmem_removexattr,
2885#endif
2886};
2887
2888static struct dentry *shmem_get_parent(struct dentry *child)
2889{
2890 return ERR_PTR(-ESTALE);
2891}
2892
2893static int shmem_match(struct inode *ino, void *vfh)
2894{
2895 __u32 *fh = vfh;
2896 __u64 inum = fh[2];
2897 inum = (inum << 32) | fh[1];
2898 return ino->i_ino == inum && fh[0] == ino->i_generation;
2899}
2900
2901static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2902 struct fid *fid, int fh_len, int fh_type)
2903{
2904 struct inode *inode;
2905 struct dentry *dentry = NULL;
2906 u64 inum;
2907
2908 if (fh_len < 3)
2909 return NULL;
2910
2911 inum = fid->raw[2];
2912 inum = (inum << 32) | fid->raw[1];
2913
2914 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2915 shmem_match, fid->raw);
2916 if (inode) {
2917 dentry = d_find_alias(inode);
2918 iput(inode);
2919 }
2920
2921 return dentry;
2922}
2923
2924static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
2925 struct inode *parent)
2926{
2927 if (*len < 3) {
2928 *len = 3;
2929 return FILEID_INVALID;
2930 }
2931
2932 if (inode_unhashed(inode)) {
2933
2934
2935
2936
2937
2938 static DEFINE_SPINLOCK(lock);
2939 spin_lock(&lock);
2940 if (inode_unhashed(inode))
2941 __insert_inode_hash(inode,
2942 inode->i_ino + inode->i_generation);
2943 spin_unlock(&lock);
2944 }
2945
2946 fh[0] = inode->i_generation;
2947 fh[1] = inode->i_ino;
2948 fh[2] = ((__u64)inode->i_ino) >> 32;
2949
2950 *len = 3;
2951 return 1;
2952}
2953
2954static const struct export_operations shmem_export_ops = {
2955 .get_parent = shmem_get_parent,
2956 .encode_fh = shmem_encode_fh,
2957 .fh_to_dentry = shmem_fh_to_dentry,
2958};
2959
2960static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2961 bool remount)
2962{
2963 char *this_char, *value, *rest;
2964 struct mempolicy *mpol = NULL;
2965 uid_t uid;
2966 gid_t gid;
2967
2968 while (options != NULL) {
2969 this_char = options;
2970 for (;;) {
2971
2972
2973
2974
2975
2976 options = strchr(options, ',');
2977 if (options == NULL)
2978 break;
2979 options++;
2980 if (!isdigit(*options)) {
2981 options[-1] = '\0';
2982 break;
2983 }
2984 }
2985 if (!*this_char)
2986 continue;
2987 if ((value = strchr(this_char,'=')) != NULL) {
2988 *value++ = 0;
2989 } else {
2990 printk(KERN_ERR
2991 "tmpfs: No value for mount option '%s'\n",
2992 this_char);
2993 goto error;
2994 }
2995
2996 if (!strcmp(this_char,"size")) {
2997 unsigned long long size;
2998 size = memparse(value,&rest);
2999 if (*rest == '%') {
3000 size <<= PAGE_SHIFT;
3001 size *= totalram_pages;
3002 do_div(size, 100);
3003 rest++;
3004 }
3005 if (*rest)
3006 goto bad_val;
3007 sbinfo->max_blocks =
3008 DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
3009 } else if (!strcmp(this_char,"nr_blocks")) {
3010 sbinfo->max_blocks = memparse(value, &rest);
3011 if (*rest)
3012 goto bad_val;
3013 } else if (!strcmp(this_char,"nr_inodes")) {
3014 sbinfo->max_inodes = memparse(value, &rest);
3015 if (*rest)
3016 goto bad_val;
3017 } else if (!strcmp(this_char,"mode")) {
3018 if (remount)
3019 continue;
3020 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
3021 if (*rest)
3022 goto bad_val;
3023 } else if (!strcmp(this_char,"uid")) {
3024 if (remount)
3025 continue;
3026 uid = simple_strtoul(value, &rest, 0);
3027 if (*rest)
3028 goto bad_val;
3029 sbinfo->uid = make_kuid(current_user_ns(), uid);
3030 if (!uid_valid(sbinfo->uid))
3031 goto bad_val;
3032 } else if (!strcmp(this_char,"gid")) {
3033 if (remount)
3034 continue;
3035 gid = simple_strtoul(value, &rest, 0);
3036 if (*rest)
3037 goto bad_val;
3038 sbinfo->gid = make_kgid(current_user_ns(), gid);
3039 if (!gid_valid(sbinfo->gid))
3040 goto bad_val;
3041 } else if (!strcmp(this_char,"mpol")) {
3042 mpol_put(mpol);
3043 mpol = NULL;
3044 if (mpol_parse_str(value, &mpol))
3045 goto bad_val;
3046 } else {
3047 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
3048 this_char);
3049 goto error;
3050 }
3051 }
3052 sbinfo->mpol = mpol;
3053 return 0;
3054
3055bad_val:
3056 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
3057 value, this_char);
3058error:
3059 mpol_put(mpol);
3060 return 1;
3061
3062}
3063
3064static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
3065{
3066 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3067 struct shmem_sb_info config = *sbinfo;
3068 unsigned long inodes;
3069 int error = -EINVAL;
3070
3071 config.mpol = NULL;
3072 if (shmem_parse_options(data, &config, true))
3073 return error;
3074
3075 spin_lock(&sbinfo->stat_lock);
3076 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
3077 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
3078 goto out;
3079 if (config.max_inodes < inodes)
3080 goto out;
3081
3082
3083
3084
3085
3086 if (config.max_blocks && !sbinfo->max_blocks)
3087 goto out;
3088 if (config.max_inodes && !sbinfo->max_inodes)
3089 goto out;
3090
3091 error = 0;
3092 sbinfo->max_blocks = config.max_blocks;
3093 sbinfo->max_inodes = config.max_inodes;
3094 sbinfo->free_inodes = config.max_inodes - inodes;
3095
3096
3097
3098
3099 if (config.mpol) {
3100 mpol_put(sbinfo->mpol);
3101 sbinfo->mpol = config.mpol;
3102 }
3103out:
3104 spin_unlock(&sbinfo->stat_lock);
3105 return error;
3106}
3107
3108static int shmem_show_options(struct seq_file *seq, struct dentry *root)
3109{
3110 struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
3111
3112 if (sbinfo->max_blocks != shmem_default_max_blocks())
3113 seq_printf(seq, ",size=%luk",
3114 sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
3115 if (sbinfo->max_inodes != shmem_default_max_inodes())
3116 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
3117 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
3118 seq_printf(seq, ",mode=%03ho", sbinfo->mode);
3119 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
3120 seq_printf(seq, ",uid=%u",
3121 from_kuid_munged(&init_user_ns, sbinfo->uid));
3122 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
3123 seq_printf(seq, ",gid=%u",
3124 from_kgid_munged(&init_user_ns, sbinfo->gid));
3125 shmem_show_mpol(seq, sbinfo->mpol);
3126 return 0;
3127}
3128
3129#define MFD_NAME_PREFIX "memfd:"
3130#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
3131#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
3132
3133#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
3134
3135SYSCALL_DEFINE2(memfd_create,
3136 const char __user *, uname,
3137 unsigned int, flags)
3138{
3139 struct shmem_inode_info *info;
3140 struct file *file;
3141 int fd, error;
3142 char *name;
3143 long len;
3144
3145 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3146 return -EINVAL;
3147
3148
3149 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
3150 if (len <= 0)
3151 return -EFAULT;
3152 if (len > MFD_NAME_MAX_LEN + 1)
3153 return -EINVAL;
3154
3155 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
3156 if (!name)
3157 return -ENOMEM;
3158
3159 strcpy(name, MFD_NAME_PREFIX);
3160 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
3161 error = -EFAULT;
3162 goto err_name;
3163 }
3164
3165
3166 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
3167 error = -EFAULT;
3168 goto err_name;
3169 }
3170
3171 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
3172 if (fd < 0) {
3173 error = fd;
3174 goto err_name;
3175 }
3176
3177 file = shmem_file_setup(name, 0, VM_NORESERVE);
3178 if (IS_ERR(file)) {
3179 error = PTR_ERR(file);
3180 goto err_fd;
3181 }
3182 info = SHMEM_I(file_inode(file));
3183 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
3184 file->f_flags |= O_RDWR | O_LARGEFILE;
3185 if (flags & MFD_ALLOW_SEALING)
3186 info->seals &= ~F_SEAL_SEAL;
3187
3188 fd_install(fd, file);
3189 kfree(name);
3190 return fd;
3191
3192err_fd:
3193 put_unused_fd(fd);
3194err_name:
3195 kfree(name);
3196 return error;
3197}
3198
3199#endif
3200
3201static void shmem_put_super(struct super_block *sb)
3202{
3203 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3204
3205 percpu_counter_destroy(&sbinfo->used_blocks);
3206 mpol_put(sbinfo->mpol);
3207 kfree(sbinfo);
3208 sb->s_fs_info = NULL;
3209}
3210
3211int shmem_fill_super(struct super_block *sb, void *data, int silent)
3212{
3213 struct inode *inode;
3214 struct shmem_sb_info *sbinfo;
3215 int err = -ENOMEM;
3216
3217
3218 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
3219 L1_CACHE_BYTES), GFP_KERNEL);
3220 if (!sbinfo)
3221 return -ENOMEM;
3222
3223 sbinfo->mode = S_IRWXUGO | S_ISVTX;
3224 sbinfo->uid = current_fsuid();
3225 sbinfo->gid = current_fsgid();
3226 sb->s_fs_info = sbinfo;
3227
3228#ifdef CONFIG_TMPFS
3229
3230
3231
3232
3233
3234 if (!(sb->s_flags & MS_NOUSER)) {
3235 sbinfo->max_blocks = shmem_default_max_blocks();
3236 sbinfo->max_inodes = shmem_default_max_inodes();
3237 if (shmem_parse_options(data, sbinfo, false)) {
3238 err = -EINVAL;
3239 goto failed;
3240 }
3241 }
3242 sb->s_export_op = &shmem_export_ops;
3243 sb->s_flags |= MS_NOSEC;
3244#else
3245 sb->s_flags |= MS_NOUSER;
3246#endif
3247
3248 spin_lock_init(&sbinfo->stat_lock);
3249 if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
3250 goto failed;
3251 sbinfo->free_inodes = sbinfo->max_inodes;
3252
3253 sb->s_maxbytes = MAX_LFS_FILESIZE;
3254 sb->s_blocksize = PAGE_CACHE_SIZE;
3255 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
3256 sb->s_magic = TMPFS_MAGIC;
3257 sb->s_op = &shmem_ops;
3258 sb->s_time_gran = 1;
3259#ifdef CONFIG_TMPFS_XATTR
3260 sb->s_xattr = shmem_xattr_handlers;
3261#endif
3262#ifdef CONFIG_TMPFS_POSIX_ACL
3263 sb->s_flags |= MS_POSIXACL;
3264#endif
3265
3266 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
3267 if (!inode)
3268 goto failed;
3269 inode->i_uid = sbinfo->uid;
3270 inode->i_gid = sbinfo->gid;
3271 sb->s_root = d_make_root(inode);
3272 if (!sb->s_root)
3273 goto failed;
3274 return 0;
3275
3276failed:
3277 shmem_put_super(sb);
3278 return err;
3279}
3280
3281static struct kmem_cache *shmem_inode_cachep;
3282
3283static struct inode *shmem_alloc_inode(struct super_block *sb)
3284{
3285 struct shmem_inode_info *info;
3286 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
3287 if (!info)
3288 return NULL;
3289 return &info->vfs_inode;
3290}
3291
3292static void shmem_destroy_callback(struct rcu_head *head)
3293{
3294 struct inode *inode = container_of(head, struct inode, i_rcu);
3295 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3296}
3297
3298static void shmem_destroy_inode(struct inode *inode)
3299{
3300 if (S_ISREG(inode->i_mode))
3301 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
3302 call_rcu(&inode->i_rcu, shmem_destroy_callback);
3303}
3304
3305static void shmem_init_inode(void *foo)
3306{
3307 struct shmem_inode_info *info = foo;
3308 inode_init_once(&info->vfs_inode);
3309}
3310
3311static int shmem_init_inodecache(void)
3312{
3313 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
3314 sizeof(struct shmem_inode_info),
3315 0, SLAB_PANIC, shmem_init_inode);
3316 return 0;
3317}
3318
3319static void shmem_destroy_inodecache(void)
3320{
3321 kmem_cache_destroy(shmem_inode_cachep);
3322}
3323
3324static const struct address_space_operations shmem_aops = {
3325 .writepage = shmem_writepage,
3326 .set_page_dirty = __set_page_dirty_no_writeback,
3327#ifdef CONFIG_TMPFS
3328 .write_begin = shmem_write_begin,
3329 .write_end = shmem_write_end,
3330#endif
3331 .migratepage = migrate_page,
3332 .error_remove_page = generic_error_remove_page,
3333};
3334
3335static const struct file_operations shmem_file_operations = {
3336 .mmap = shmem_mmap,
3337#ifdef CONFIG_TMPFS
3338 .llseek = shmem_file_llseek,
3339 .read = do_sync_read,
3340 .write = do_sync_write,
3341 .aio_read = shmem_file_aio_read,
3342 .aio_write = generic_file_aio_write,
3343 .fsync = noop_fsync,
3344 .splice_read = shmem_file_splice_read,
3345 .splice_write = generic_file_splice_write,
3346 .fallocate = shmem_fallocate,
3347#endif
3348};
3349
3350static const struct inode_operations shmem_inode_operations = {
3351 .setattr = shmem_setattr,
3352#ifdef CONFIG_TMPFS_XATTR
3353 .setxattr = shmem_setxattr,
3354 .getxattr = shmem_getxattr,
3355 .listxattr = shmem_listxattr,
3356 .removexattr = shmem_removexattr,
3357#endif
3358};
3359
3360static const struct inode_operations_wrapper shmem_dir_inode_operations = {
3361 .ops = {
3362#ifdef CONFIG_TMPFS
3363 .create = shmem_create,
3364 .lookup = simple_lookup,
3365 .link = shmem_link,
3366 .unlink = shmem_unlink,
3367 .symlink = shmem_symlink,
3368 .mkdir = shmem_mkdir,
3369 .rmdir = shmem_rmdir,
3370 .mknod = shmem_mknod,
3371 .rename = shmem_rename,
3372#endif
3373#ifdef CONFIG_TMPFS_XATTR
3374 .setxattr = shmem_setxattr,
3375 .getxattr = shmem_getxattr,
3376 .listxattr = shmem_listxattr,
3377 .removexattr = shmem_removexattr,
3378#endif
3379#ifdef CONFIG_TMPFS_POSIX_ACL
3380 .setattr = shmem_setattr,
3381#endif
3382 },
3383#ifdef CONFIG_TMPFS
3384 .rename2 = shmem_rename2,
3385#endif
3386};
3387
3388static const struct inode_operations shmem_special_inode_operations = {
3389#ifdef CONFIG_TMPFS_XATTR
3390 .setxattr = shmem_setxattr,
3391 .getxattr = shmem_getxattr,
3392 .listxattr = shmem_listxattr,
3393 .removexattr = shmem_removexattr,
3394#endif
3395#ifdef CONFIG_TMPFS_POSIX_ACL
3396 .setattr = shmem_setattr,
3397#endif
3398};
3399
3400static const struct super_operations shmem_ops = {
3401 .alloc_inode = shmem_alloc_inode,
3402 .destroy_inode = shmem_destroy_inode,
3403#ifdef CONFIG_TMPFS
3404 .statfs = shmem_statfs,
3405 .remount_fs = shmem_remount_fs,
3406 .show_options = shmem_show_options,
3407#endif
3408 .evict_inode = shmem_evict_inode,
3409 .drop_inode = generic_delete_inode,
3410 .put_super = shmem_put_super,
3411};
3412
3413static const struct vm_operations_struct shmem_vm_ops = {
3414 .fault = shmem_fault,
3415#ifdef CONFIG_NUMA
3416 .set_policy = shmem_set_policy,
3417 .get_policy = shmem_get_policy,
3418#endif
3419 .remap_pages = generic_file_remap_pages,
3420};
3421
3422static struct dentry *shmem_mount(struct file_system_type *fs_type,
3423 int flags, const char *dev_name, void *data)
3424{
3425 return mount_nodev(fs_type, flags, data, shmem_fill_super);
3426}
3427
3428static struct file_system_type shmem_fs_type = {
3429 .owner = THIS_MODULE,
3430 .name = "tmpfs",
3431 .mount = shmem_mount,
3432 .kill_sb = kill_litter_super,
3433 .fs_flags = FS_USERNS_MOUNT,
3434};
3435
3436int __init shmem_init(void)
3437{
3438 int error;
3439
3440
3441 if (shmem_inode_cachep)
3442 return 0;
3443
3444 error = bdi_init(&shmem_backing_dev_info);
3445 if (error)
3446 goto out4;
3447
3448 error = shmem_init_inodecache();
3449 if (error)
3450 goto out3;
3451
3452 error = register_filesystem(&shmem_fs_type);
3453 if (error) {
3454 printk(KERN_ERR "Could not register tmpfs\n");
3455 goto out2;
3456 }
3457
3458 shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
3459 shmem_fs_type.name, NULL);
3460 if (IS_ERR(shm_mnt)) {
3461 error = PTR_ERR(shm_mnt);
3462 printk(KERN_ERR "Could not kern_mount tmpfs\n");
3463 goto out1;
3464 }
3465 return 0;
3466
3467out1:
3468 unregister_filesystem(&shmem_fs_type);
3469out2:
3470 shmem_destroy_inodecache();
3471out3:
3472 bdi_destroy(&shmem_backing_dev_info);
3473out4:
3474 shm_mnt = ERR_PTR(error);
3475 return error;
3476}
3477
3478#else
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489static struct file_system_type shmem_fs_type = {
3490 .name = "tmpfs",
3491 .mount = ramfs_mount,
3492 .kill_sb = kill_litter_super,
3493 .fs_flags = FS_USERNS_MOUNT,
3494};
3495
3496int __init shmem_init(void)
3497{
3498 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
3499
3500 shm_mnt = kern_mount(&shmem_fs_type);
3501 BUG_ON(IS_ERR(shm_mnt));
3502
3503 return 0;
3504}
3505
3506int shmem_unuse(swp_entry_t swap, struct page *page)
3507{
3508 return 0;
3509}
3510
3511int shmem_lock(struct file *file, int lock, struct user_struct *user)
3512{
3513 return 0;
3514}
3515
3516void shmem_unlock_mapping(struct address_space *mapping)
3517{
3518}
3519
3520void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
3521{
3522 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
3523}
3524EXPORT_SYMBOL_GPL(shmem_truncate_range);
3525
3526#define shmem_vm_ops generic_file_vm_ops
3527#define shmem_file_operations ramfs_file_operations
3528#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
3529#define shmem_acct_size(flags, size) 0
3530#define shmem_unacct_size(flags, size) do {} while (0)
3531
3532#endif
3533
3534
3535
3536static struct dentry_operations anon_ops = {
3537 .d_dname = simple_dname
3538};
3539
3540static struct file *__shmem_file_setup(const char *name, loff_t size,
3541 unsigned long flags, unsigned int i_flags)
3542{
3543 struct file *res;
3544 struct inode *inode;
3545 struct path path;
3546 struct super_block *sb;
3547 struct qstr this;
3548
3549 if (IS_ERR(shm_mnt))
3550 return ERR_CAST(shm_mnt);
3551
3552 if (size < 0 || size > MAX_LFS_FILESIZE)
3553 return ERR_PTR(-EINVAL);
3554
3555 if (shmem_acct_size(flags, size))
3556 return ERR_PTR(-ENOMEM);
3557
3558 res = ERR_PTR(-ENOMEM);
3559 this.name = name;
3560 this.len = strlen(name);
3561 this.hash = 0;
3562 sb = shm_mnt->mnt_sb;
3563 path.dentry = d_alloc_pseudo(sb, &this);
3564 if (!path.dentry)
3565 goto put_memory;
3566 d_set_d_op(path.dentry, &anon_ops);
3567 path.mnt = mntget(shm_mnt);
3568
3569 res = ERR_PTR(-ENOSPC);
3570 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
3571 if (!inode)
3572 goto put_dentry;
3573
3574 inode->i_flags |= i_flags;
3575 d_instantiate(path.dentry, inode);
3576 inode->i_size = size;
3577 clear_nlink(inode);
3578 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
3579 if (IS_ERR(res))
3580 goto put_dentry;
3581
3582 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
3583 &shmem_file_operations);
3584 if (IS_ERR(res))
3585 goto put_dentry;
3586
3587 return res;
3588
3589put_dentry:
3590 path_put(&path);
3591put_memory:
3592 shmem_unacct_size(flags, size);
3593 return res;
3594}
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
3607{
3608 return __shmem_file_setup(name, size, flags, S_PRIVATE);
3609}
3610
3611
3612
3613
3614
3615
3616
3617struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
3618{
3619 return __shmem_file_setup(name, size, flags, 0);
3620}
3621EXPORT_SYMBOL_GPL(shmem_file_setup);
3622
3623
3624
3625
3626
3627int shmem_zero_setup(struct vm_area_struct *vma)
3628{
3629 struct file *file;
3630 loff_t size = vma->vm_end - vma->vm_start;
3631
3632 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
3633 if (IS_ERR(file))
3634 return PTR_ERR(file);
3635
3636 if (vma->vm_file)
3637 fput(vma->vm_file);
3638 vma->vm_file = file;
3639 vma->vm_ops = &shmem_vm_ops;
3640 return 0;
3641}
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
3659 pgoff_t index, gfp_t gfp)
3660{
3661#ifdef CONFIG_SHMEM
3662 struct inode *inode = mapping->host;
3663 struct page *page;
3664 int error;
3665
3666 BUG_ON(mapping->a_ops != &shmem_aops);
3667 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
3668 gfp, NULL, NULL, NULL);
3669 if (error)
3670 page = ERR_PTR(error);
3671 else
3672 unlock_page(page);
3673 return page;
3674#else
3675
3676
3677
3678 return read_cache_page_gfp(mapping, index, gfp);
3679#endif
3680}
3681EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
3682