1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/pagemap.h>
30#include <linux/file.h>
31#include <linux/mm.h>
32#include <linux/export.h>
33#include <linux/swap.h>
34#include <linux/uio.h>
35
36static struct vfsmount *shm_mnt;
37
38#ifdef CONFIG_SHMEM
39
40
41
42
43
44
45#include <linux/xattr.h>
46#include <linux/exportfs.h>
47#include <linux/posix_acl.h>
48#include <linux/posix_acl_xattr.h>
49#include <linux/mman.h>
50#include <linux/string.h>
51#include <linux/slab.h>
52#include <linux/backing-dev.h>
53#include <linux/shmem_fs.h>
54#include <linux/writeback.h>
55#include <linux/blkdev.h>
56#include <linux/pagevec.h>
57#include <linux/percpu_counter.h>
58#include <linux/falloc.h>
59#include <linux/splice.h>
60#include <linux/security.h>
61#include <linux/swapops.h>
62#include <linux/mempolicy.h>
63#include <linux/namei.h>
64#include <linux/ctype.h>
65#include <linux/migrate.h>
66#include <linux/highmem.h>
67#include <linux/seq_file.h>
68#include <linux/magic.h>
69#include <linux/syscalls.h>
70#include <linux/fcntl.h>
71#include <uapi/linux/memfd.h>
72
73#include <asm/uaccess.h>
74#include <asm/pgtable.h>
75
76#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
77#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
78
79
80#define BOGO_DIRENT_SIZE 20
81
82
83#define SHORT_SYMLINK_LEN 128
84
85
86
87
88
89
90struct shmem_falloc {
91 wait_queue_head_t *waitq;
92 pgoff_t start;
93 pgoff_t next;
94 pgoff_t nr_falloced;
95 pgoff_t nr_unswapped;
96};
97
98
99enum sgp_type {
100 SGP_READ,
101 SGP_CACHE,
102 SGP_DIRTY,
103 SGP_WRITE,
104 SGP_FALLOC,
105};
106
107#ifdef CONFIG_TMPFS
108static unsigned long shmem_default_max_blocks(void)
109{
110 return totalram_pages / 2;
111}
112
113static unsigned long shmem_default_max_inodes(void)
114{
115 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
116}
117#endif
118
119static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
120static int shmem_replace_page(struct page **pagep, gfp_t gfp,
121 struct shmem_inode_info *info, pgoff_t index);
122static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
123 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
124
125static inline int shmem_getpage(struct inode *inode, pgoff_t index,
126 struct page **pagep, enum sgp_type sgp, int *fault_type)
127{
128 return shmem_getpage_gfp(inode, index, pagep, sgp,
129 mapping_gfp_mask(inode->i_mapping), fault_type);
130}
131
132static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
133{
134 return sb->s_fs_info;
135}
136
137
138
139
140
141
142
143static inline int shmem_acct_size(unsigned long flags, loff_t size)
144{
145 return (flags & VM_NORESERVE) ?
146 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
147}
148
149static inline void shmem_unacct_size(unsigned long flags, loff_t size)
150{
151 if (!(flags & VM_NORESERVE))
152 vm_unacct_memory(VM_ACCT(size));
153}
154
155static inline int shmem_reacct_size(unsigned long flags,
156 loff_t oldsize, loff_t newsize)
157{
158 if (!(flags & VM_NORESERVE)) {
159 if (VM_ACCT(newsize) > VM_ACCT(oldsize))
160 return security_vm_enough_memory_mm(current->mm,
161 VM_ACCT(newsize) - VM_ACCT(oldsize));
162 else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
163 vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
164 }
165 return 0;
166}
167
168
169
170
171
172
173
174static inline int shmem_acct_block(unsigned long flags)
175{
176 return (flags & VM_NORESERVE) ?
177 security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
178}
179
180static inline void shmem_unacct_blocks(unsigned long flags, long pages)
181{
182 if (flags & VM_NORESERVE)
183 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
184}
185
186static const struct super_operations shmem_ops;
187static const struct address_space_operations shmem_aops;
188static const struct file_operations shmem_file_operations;
189static const struct inode_operations shmem_inode_operations;
190static const struct inode_operations shmem_dir_inode_operations;
191static const struct inode_operations shmem_special_inode_operations;
192static const struct vm_operations_struct shmem_vm_ops;
193
194static LIST_HEAD(shmem_swaplist);
195static DEFINE_MUTEX(shmem_swaplist_mutex);
196
197static int shmem_reserve_inode(struct super_block *sb)
198{
199 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
200 if (sbinfo->max_inodes) {
201 spin_lock(&sbinfo->stat_lock);
202 if (!sbinfo->free_inodes) {
203 spin_unlock(&sbinfo->stat_lock);
204 return -ENOSPC;
205 }
206 sbinfo->free_inodes--;
207 spin_unlock(&sbinfo->stat_lock);
208 }
209 return 0;
210}
211
212static void shmem_free_inode(struct super_block *sb)
213{
214 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
215 if (sbinfo->max_inodes) {
216 spin_lock(&sbinfo->stat_lock);
217 sbinfo->free_inodes++;
218 spin_unlock(&sbinfo->stat_lock);
219 }
220}
221
222
223
224
225
226
227
228
229
230
231
232
233
234static void shmem_recalc_inode(struct inode *inode)
235{
236 struct shmem_inode_info *info = SHMEM_I(inode);
237 long freed;
238
239 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
240 if (freed > 0) {
241 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
242 if (sbinfo->max_blocks)
243 percpu_counter_add(&sbinfo->used_blocks, -freed);
244 info->alloced -= freed;
245 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
246 shmem_unacct_blocks(info->flags, freed);
247 }
248}
249
250
251
252
253static int shmem_radix_tree_replace(struct address_space *mapping,
254 pgoff_t index, void *expected, void *replacement)
255{
256 void **pslot;
257 void *item;
258
259 VM_BUG_ON(!expected);
260 VM_BUG_ON(!replacement);
261 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
262 if (!pslot)
263 return -ENOENT;
264 item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
265 if (item != expected)
266 return -ENOENT;
267 radix_tree_replace_slot(pslot, replacement);
268 return 0;
269}
270
271
272
273
274
275
276
277
278static bool shmem_confirm_swap(struct address_space *mapping,
279 pgoff_t index, swp_entry_t swap)
280{
281 void *item;
282
283 rcu_read_lock();
284 item = radix_tree_lookup(&mapping->page_tree, index);
285 rcu_read_unlock();
286 return item == swp_to_radix_entry(swap);
287}
288
289
290
291
292static int shmem_add_to_page_cache(struct page *page,
293 struct address_space *mapping,
294 pgoff_t index, void *expected)
295{
296 int error;
297
298 VM_BUG_ON_PAGE(!PageLocked(page), page);
299 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
300
301 page_cache_get(page);
302 page->mapping = mapping;
303 page->index = index;
304
305 spin_lock_irq(&mapping->tree_lock);
306 if (!expected)
307 error = radix_tree_insert(&mapping->page_tree, index, page);
308 else
309 error = shmem_radix_tree_replace(mapping, index, expected,
310 page);
311 if (!error) {
312 mapping->nrpages++;
313 __inc_zone_page_state(page, NR_FILE_PAGES);
314 __inc_zone_page_state(page, NR_SHMEM);
315 spin_unlock_irq(&mapping->tree_lock);
316 } else {
317 page->mapping = NULL;
318 spin_unlock_irq(&mapping->tree_lock);
319 page_cache_release(page);
320 }
321 return error;
322}
323
324
325
326
327static void shmem_delete_from_page_cache(struct page *page, void *radswap)
328{
329 struct address_space *mapping = page->mapping;
330 int error;
331
332 spin_lock_irq(&mapping->tree_lock);
333 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
334 page->mapping = NULL;
335 mapping->nrpages--;
336 __dec_zone_page_state(page, NR_FILE_PAGES);
337 __dec_zone_page_state(page, NR_SHMEM);
338 spin_unlock_irq(&mapping->tree_lock);
339 page_cache_release(page);
340 BUG_ON(error);
341}
342
343
344
345
346static int shmem_free_swap(struct address_space *mapping,
347 pgoff_t index, void *radswap)
348{
349 void *old;
350
351 spin_lock_irq(&mapping->tree_lock);
352 old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
353 spin_unlock_irq(&mapping->tree_lock);
354 if (old != radswap)
355 return -ENOENT;
356 free_swap_and_cache(radix_to_swp_entry(radswap));
357 return 0;
358}
359
360
361
362
363void shmem_unlock_mapping(struct address_space *mapping)
364{
365 struct pagevec pvec;
366 pgoff_t indices[PAGEVEC_SIZE];
367 pgoff_t index = 0;
368
369 pagevec_init(&pvec, 0);
370
371
372
373 while (!mapping_unevictable(mapping)) {
374
375
376
377
378 pvec.nr = find_get_entries(mapping, index,
379 PAGEVEC_SIZE, pvec.pages, indices);
380 if (!pvec.nr)
381 break;
382 index = indices[pvec.nr - 1] + 1;
383 pagevec_remove_exceptionals(&pvec);
384 check_move_unevictable_pages(pvec.pages, pvec.nr);
385 pagevec_release(&pvec);
386 cond_resched();
387 }
388}
389
390
391
392
393
394static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
395 bool unfalloc)
396{
397 struct address_space *mapping = inode->i_mapping;
398 struct shmem_inode_info *info = SHMEM_I(inode);
399 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
400 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
401 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
402 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
403 struct pagevec pvec;
404 pgoff_t indices[PAGEVEC_SIZE];
405 long nr_swaps_freed = 0;
406 pgoff_t index;
407 int i;
408
409 if (lend == -1)
410 end = -1;
411
412 pagevec_init(&pvec, 0);
413 index = start;
414 while (index < end) {
415 pvec.nr = find_get_entries(mapping, index,
416 min(end - index, (pgoff_t)PAGEVEC_SIZE),
417 pvec.pages, indices);
418 if (!pvec.nr)
419 break;
420 for (i = 0; i < pagevec_count(&pvec); i++) {
421 struct page *page = pvec.pages[i];
422
423 index = indices[i];
424 if (index >= end)
425 break;
426
427 if (radix_tree_exceptional_entry(page)) {
428 if (unfalloc)
429 continue;
430 nr_swaps_freed += !shmem_free_swap(mapping,
431 index, page);
432 continue;
433 }
434
435 if (!trylock_page(page))
436 continue;
437 if (!unfalloc || !PageUptodate(page)) {
438 if (page->mapping == mapping) {
439 VM_BUG_ON_PAGE(PageWriteback(page), page);
440 truncate_inode_page(mapping, page);
441 }
442 }
443 unlock_page(page);
444 }
445 pagevec_remove_exceptionals(&pvec);
446 pagevec_release(&pvec);
447 cond_resched();
448 index++;
449 }
450
451 if (partial_start) {
452 struct page *page = NULL;
453 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
454 if (page) {
455 unsigned int top = PAGE_CACHE_SIZE;
456 if (start > end) {
457 top = partial_end;
458 partial_end = 0;
459 }
460 zero_user_segment(page, partial_start, top);
461 set_page_dirty(page);
462 unlock_page(page);
463 page_cache_release(page);
464 }
465 }
466 if (partial_end) {
467 struct page *page = NULL;
468 shmem_getpage(inode, end, &page, SGP_READ, NULL);
469 if (page) {
470 zero_user_segment(page, 0, partial_end);
471 set_page_dirty(page);
472 unlock_page(page);
473 page_cache_release(page);
474 }
475 }
476 if (start >= end)
477 return;
478
479 index = start;
480 while (index < end) {
481 cond_resched();
482
483 pvec.nr = find_get_entries(mapping, index,
484 min(end - index, (pgoff_t)PAGEVEC_SIZE),
485 pvec.pages, indices);
486 if (!pvec.nr) {
487
488 if (index == start || end != -1)
489 break;
490
491 index = start;
492 continue;
493 }
494 for (i = 0; i < pagevec_count(&pvec); i++) {
495 struct page *page = pvec.pages[i];
496
497 index = indices[i];
498 if (index >= end)
499 break;
500
501 if (radix_tree_exceptional_entry(page)) {
502 if (unfalloc)
503 continue;
504 if (shmem_free_swap(mapping, index, page)) {
505
506 index--;
507 break;
508 }
509 nr_swaps_freed++;
510 continue;
511 }
512
513 lock_page(page);
514 if (!unfalloc || !PageUptodate(page)) {
515 if (page->mapping == mapping) {
516 VM_BUG_ON_PAGE(PageWriteback(page), page);
517 truncate_inode_page(mapping, page);
518 } else {
519
520 unlock_page(page);
521 index--;
522 break;
523 }
524 }
525 unlock_page(page);
526 }
527 pagevec_remove_exceptionals(&pvec);
528 pagevec_release(&pvec);
529 index++;
530 }
531
532 spin_lock(&info->lock);
533 info->swapped -= nr_swaps_freed;
534 shmem_recalc_inode(inode);
535 spin_unlock(&info->lock);
536}
537
538void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
539{
540 shmem_undo_range(inode, lstart, lend, false);
541 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
542}
543EXPORT_SYMBOL_GPL(shmem_truncate_range);
544
545static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
546{
547 struct inode *inode = d_inode(dentry);
548 struct shmem_inode_info *info = SHMEM_I(inode);
549 int error;
550
551 error = inode_change_ok(inode, attr);
552 if (error)
553 return error;
554
555 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
556 loff_t oldsize = inode->i_size;
557 loff_t newsize = attr->ia_size;
558
559
560 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
561 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
562 return -EPERM;
563
564 if (newsize != oldsize) {
565 error = shmem_reacct_size(SHMEM_I(inode)->flags,
566 oldsize, newsize);
567 if (error)
568 return error;
569 i_size_write(inode, newsize);
570 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
571 }
572 if (newsize < oldsize) {
573 loff_t holebegin = round_up(newsize, PAGE_SIZE);
574 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
575 shmem_truncate_range(inode, newsize, (loff_t)-1);
576
577 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
578 }
579 }
580
581 setattr_copy(inode, attr);
582 if (attr->ia_valid & ATTR_MODE)
583 error = posix_acl_chmod(inode, inode->i_mode);
584 return error;
585}
586
587static void shmem_evict_inode(struct inode *inode)
588{
589 struct shmem_inode_info *info = SHMEM_I(inode);
590
591 if (inode->i_mapping->a_ops == &shmem_aops) {
592 shmem_unacct_size(info->flags, inode->i_size);
593 inode->i_size = 0;
594 shmem_truncate_range(inode, 0, (loff_t)-1);
595 if (!list_empty(&info->swaplist)) {
596 mutex_lock(&shmem_swaplist_mutex);
597 list_del_init(&info->swaplist);
598 mutex_unlock(&shmem_swaplist_mutex);
599 }
600 } else
601 kfree(info->symlink);
602
603 simple_xattrs_free(&info->xattrs);
604 WARN_ON(inode->i_blocks);
605 shmem_free_inode(inode->i_sb);
606 clear_inode(inode);
607}
608
609
610
611
612static int shmem_unuse_inode(struct shmem_inode_info *info,
613 swp_entry_t swap, struct page **pagep)
614{
615 struct address_space *mapping = info->vfs_inode.i_mapping;
616 void *radswap;
617 pgoff_t index;
618 gfp_t gfp;
619 int error = 0;
620
621 radswap = swp_to_radix_entry(swap);
622 index = radix_tree_locate_item(&mapping->page_tree, radswap);
623 if (index == -1)
624 return -EAGAIN;
625
626
627
628
629
630
631
632 if (shmem_swaplist.next != &info->swaplist)
633 list_move_tail(&shmem_swaplist, &info->swaplist);
634
635 gfp = mapping_gfp_mask(mapping);
636 if (shmem_should_replace_page(*pagep, gfp)) {
637 mutex_unlock(&shmem_swaplist_mutex);
638 error = shmem_replace_page(pagep, gfp, info, index);
639 mutex_lock(&shmem_swaplist_mutex);
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658 if (!page_swapcount(*pagep))
659 error = -ENOENT;
660 }
661
662
663
664
665
666
667 if (!error)
668 error = shmem_add_to_page_cache(*pagep, mapping, index,
669 radswap);
670 if (error != -ENOMEM) {
671
672
673
674
675 delete_from_swap_cache(*pagep);
676 set_page_dirty(*pagep);
677 if (!error) {
678 spin_lock(&info->lock);
679 info->swapped--;
680 spin_unlock(&info->lock);
681 swap_free(swap);
682 }
683 }
684 return error;
685}
686
687
688
689
690int shmem_unuse(swp_entry_t swap, struct page *page)
691{
692 struct list_head *this, *next;
693 struct shmem_inode_info *info;
694 struct mem_cgroup *memcg;
695 int error = 0;
696
697
698
699
700
701 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
702 goto out;
703
704
705
706
707
708
709 error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
710 if (error)
711 goto out;
712
713 error = -EAGAIN;
714
715 mutex_lock(&shmem_swaplist_mutex);
716 list_for_each_safe(this, next, &shmem_swaplist) {
717 info = list_entry(this, struct shmem_inode_info, swaplist);
718 if (info->swapped)
719 error = shmem_unuse_inode(info, swap, &page);
720 else
721 list_del_init(&info->swaplist);
722 cond_resched();
723 if (error != -EAGAIN)
724 break;
725
726 }
727 mutex_unlock(&shmem_swaplist_mutex);
728
729 if (error) {
730 if (error != -ENOMEM)
731 error = 0;
732 mem_cgroup_cancel_charge(page, memcg);
733 } else
734 mem_cgroup_commit_charge(page, memcg, true);
735out:
736 unlock_page(page);
737 page_cache_release(page);
738 return error;
739}
740
741
742
743
744static int shmem_writepage(struct page *page, struct writeback_control *wbc)
745{
746 struct shmem_inode_info *info;
747 struct address_space *mapping;
748 struct inode *inode;
749 swp_entry_t swap;
750 pgoff_t index;
751
752 BUG_ON(!PageLocked(page));
753 mapping = page->mapping;
754 index = page->index;
755 inode = mapping->host;
756 info = SHMEM_I(inode);
757 if (info->flags & VM_LOCKED)
758 goto redirty;
759 if (!total_swap_pages)
760 goto redirty;
761
762
763
764
765
766
767
768
769 if (!wbc->for_reclaim) {
770 WARN_ON_ONCE(1);
771 goto redirty;
772 }
773
774
775
776
777
778
779
780
781
782
783
784
785 if (!PageUptodate(page)) {
786 if (inode->i_private) {
787 struct shmem_falloc *shmem_falloc;
788 spin_lock(&inode->i_lock);
789 shmem_falloc = inode->i_private;
790 if (shmem_falloc &&
791 !shmem_falloc->waitq &&
792 index >= shmem_falloc->start &&
793 index < shmem_falloc->next)
794 shmem_falloc->nr_unswapped++;
795 else
796 shmem_falloc = NULL;
797 spin_unlock(&inode->i_lock);
798 if (shmem_falloc)
799 goto redirty;
800 }
801 clear_highpage(page);
802 flush_dcache_page(page);
803 SetPageUptodate(page);
804 }
805
806 swap = get_swap_page();
807 if (!swap.val)
808 goto redirty;
809
810
811
812
813
814
815
816
817
818 mutex_lock(&shmem_swaplist_mutex);
819 if (list_empty(&info->swaplist))
820 list_add_tail(&info->swaplist, &shmem_swaplist);
821
822 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
823 swap_shmem_alloc(swap);
824 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
825
826 spin_lock(&info->lock);
827 info->swapped++;
828 shmem_recalc_inode(inode);
829 spin_unlock(&info->lock);
830
831 mutex_unlock(&shmem_swaplist_mutex);
832 BUG_ON(page_mapped(page));
833 swap_writepage(page, wbc);
834 return 0;
835 }
836
837 mutex_unlock(&shmem_swaplist_mutex);
838 swapcache_free(swap);
839redirty:
840 set_page_dirty(page);
841 if (wbc->for_reclaim)
842 return AOP_WRITEPAGE_ACTIVATE;
843 unlock_page(page);
844 return 0;
845}
846
847#ifdef CONFIG_NUMA
848#ifdef CONFIG_TMPFS
849static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
850{
851 char buffer[64];
852
853 if (!mpol || mpol->mode == MPOL_DEFAULT)
854 return;
855
856 mpol_to_str(buffer, sizeof(buffer), mpol);
857
858 seq_printf(seq, ",mpol=%s", buffer);
859}
860
861static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
862{
863 struct mempolicy *mpol = NULL;
864 if (sbinfo->mpol) {
865 spin_lock(&sbinfo->stat_lock);
866 mpol = sbinfo->mpol;
867 mpol_get(mpol);
868 spin_unlock(&sbinfo->stat_lock);
869 }
870 return mpol;
871}
872#endif
873
874static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
875 struct shmem_inode_info *info, pgoff_t index)
876{
877 struct vm_area_struct pvma;
878 struct page *page;
879
880
881 pvma.vm_start = 0;
882
883 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
884 pvma.vm_ops = NULL;
885 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
886
887 page = swapin_readahead(swap, gfp, &pvma, 0);
888
889
890 mpol_cond_put(pvma.vm_policy);
891
892 return page;
893}
894
895static struct page *shmem_alloc_page(gfp_t gfp,
896 struct shmem_inode_info *info, pgoff_t index)
897{
898 struct vm_area_struct pvma;
899 struct page *page;
900
901
902 pvma.vm_start = 0;
903
904 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
905 pvma.vm_ops = NULL;
906 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
907
908 page = alloc_page_vma(gfp, &pvma, 0);
909
910
911 mpol_cond_put(pvma.vm_policy);
912
913 return page;
914}
915#else
916#ifdef CONFIG_TMPFS
917static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
918{
919}
920#endif
921
922static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
923 struct shmem_inode_info *info, pgoff_t index)
924{
925 return swapin_readahead(swap, gfp, NULL, 0);
926}
927
928static inline struct page *shmem_alloc_page(gfp_t gfp,
929 struct shmem_inode_info *info, pgoff_t index)
930{
931 return alloc_page(gfp);
932}
933#endif
934
935#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
936static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
937{
938 return NULL;
939}
940#endif
941
942
943
944
945
946
947
948
949
950
951
952
953
954static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
955{
956 return page_zonenum(page) > gfp_zone(gfp);
957}
958
959static int shmem_replace_page(struct page **pagep, gfp_t gfp,
960 struct shmem_inode_info *info, pgoff_t index)
961{
962 struct page *oldpage, *newpage;
963 struct address_space *swap_mapping;
964 pgoff_t swap_index;
965 int error;
966
967 oldpage = *pagep;
968 swap_index = page_private(oldpage);
969 swap_mapping = page_mapping(oldpage);
970
971
972
973
974
975 gfp &= ~GFP_CONSTRAINT_MASK;
976 newpage = shmem_alloc_page(gfp, info, index);
977 if (!newpage)
978 return -ENOMEM;
979
980 page_cache_get(newpage);
981 copy_highpage(newpage, oldpage);
982 flush_dcache_page(newpage);
983
984 __set_page_locked(newpage);
985 SetPageUptodate(newpage);
986 SetPageSwapBacked(newpage);
987 set_page_private(newpage, swap_index);
988 SetPageSwapCache(newpage);
989
990
991
992
993
994 spin_lock_irq(&swap_mapping->tree_lock);
995 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
996 newpage);
997 if (!error) {
998 __inc_zone_page_state(newpage, NR_FILE_PAGES);
999 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1000 }
1001 spin_unlock_irq(&swap_mapping->tree_lock);
1002
1003 if (unlikely(error)) {
1004
1005
1006
1007
1008
1009 oldpage = newpage;
1010 } else {
1011 mem_cgroup_migrate(oldpage, newpage, true);
1012 lru_cache_add_anon(newpage);
1013 *pagep = newpage;
1014 }
1015
1016 ClearPageSwapCache(oldpage);
1017 set_page_private(oldpage, 0);
1018
1019 unlock_page(oldpage);
1020 page_cache_release(oldpage);
1021 page_cache_release(oldpage);
1022 return error;
1023}
1024
1025
1026
1027
1028
1029
1030
1031
1032static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1033 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1034{
1035 struct address_space *mapping = inode->i_mapping;
1036 struct shmem_inode_info *info;
1037 struct shmem_sb_info *sbinfo;
1038 struct mem_cgroup *memcg;
1039 struct page *page;
1040 swp_entry_t swap;
1041 int error;
1042 int once = 0;
1043 int alloced = 0;
1044
1045 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1046 return -EFBIG;
1047repeat:
1048 swap.val = 0;
1049 page = find_lock_entry(mapping, index);
1050 if (radix_tree_exceptional_entry(page)) {
1051 swap = radix_to_swp_entry(page);
1052 page = NULL;
1053 }
1054
1055 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1056 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1057 error = -EINVAL;
1058 goto failed;
1059 }
1060
1061 if (page && sgp == SGP_WRITE)
1062 mark_page_accessed(page);
1063
1064
1065 if (page && !PageUptodate(page)) {
1066 if (sgp != SGP_READ)
1067 goto clear;
1068 unlock_page(page);
1069 page_cache_release(page);
1070 page = NULL;
1071 }
1072 if (page || (sgp == SGP_READ && !swap.val)) {
1073 *pagep = page;
1074 return 0;
1075 }
1076
1077
1078
1079
1080
1081 info = SHMEM_I(inode);
1082 sbinfo = SHMEM_SB(inode->i_sb);
1083
1084 if (swap.val) {
1085
1086 page = lookup_swap_cache(swap);
1087 if (!page) {
1088
1089 if (fault_type)
1090 *fault_type |= VM_FAULT_MAJOR;
1091 page = shmem_swapin(swap, gfp, info, index);
1092 if (!page) {
1093 error = -ENOMEM;
1094 goto failed;
1095 }
1096 }
1097
1098
1099 lock_page(page);
1100 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1101 !shmem_confirm_swap(mapping, index, swap)) {
1102 error = -EEXIST;
1103 goto unlock;
1104 }
1105 if (!PageUptodate(page)) {
1106 error = -EIO;
1107 goto failed;
1108 }
1109 wait_on_page_writeback(page);
1110
1111 if (shmem_should_replace_page(page, gfp)) {
1112 error = shmem_replace_page(&page, gfp, info, index);
1113 if (error)
1114 goto failed;
1115 }
1116
1117 error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
1118 if (!error) {
1119 error = shmem_add_to_page_cache(page, mapping, index,
1120 swp_to_radix_entry(swap));
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133 if (error) {
1134 mem_cgroup_cancel_charge(page, memcg);
1135 delete_from_swap_cache(page);
1136 }
1137 }
1138 if (error)
1139 goto failed;
1140
1141 mem_cgroup_commit_charge(page, memcg, true);
1142
1143 spin_lock(&info->lock);
1144 info->swapped--;
1145 shmem_recalc_inode(inode);
1146 spin_unlock(&info->lock);
1147
1148 if (sgp == SGP_WRITE)
1149 mark_page_accessed(page);
1150
1151 delete_from_swap_cache(page);
1152 set_page_dirty(page);
1153 swap_free(swap);
1154
1155 } else {
1156 if (shmem_acct_block(info->flags)) {
1157 error = -ENOSPC;
1158 goto failed;
1159 }
1160 if (sbinfo->max_blocks) {
1161 if (percpu_counter_compare(&sbinfo->used_blocks,
1162 sbinfo->max_blocks) >= 0) {
1163 error = -ENOSPC;
1164 goto unacct;
1165 }
1166 percpu_counter_inc(&sbinfo->used_blocks);
1167 }
1168
1169 page = shmem_alloc_page(gfp, info, index);
1170 if (!page) {
1171 error = -ENOMEM;
1172 goto decused;
1173 }
1174
1175 __SetPageSwapBacked(page);
1176 __set_page_locked(page);
1177 if (sgp == SGP_WRITE)
1178 __SetPageReferenced(page);
1179
1180 error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
1181 if (error)
1182 goto decused;
1183 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1184 if (!error) {
1185 error = shmem_add_to_page_cache(page, mapping, index,
1186 NULL);
1187 radix_tree_preload_end();
1188 }
1189 if (error) {
1190 mem_cgroup_cancel_charge(page, memcg);
1191 goto decused;
1192 }
1193 mem_cgroup_commit_charge(page, memcg, false);
1194 lru_cache_add_anon(page);
1195
1196 spin_lock(&info->lock);
1197 info->alloced++;
1198 inode->i_blocks += BLOCKS_PER_PAGE;
1199 shmem_recalc_inode(inode);
1200 spin_unlock(&info->lock);
1201 alloced = true;
1202
1203
1204
1205
1206 if (sgp == SGP_FALLOC)
1207 sgp = SGP_WRITE;
1208clear:
1209
1210
1211
1212
1213
1214 if (sgp != SGP_WRITE) {
1215 clear_highpage(page);
1216 flush_dcache_page(page);
1217 SetPageUptodate(page);
1218 }
1219 if (sgp == SGP_DIRTY)
1220 set_page_dirty(page);
1221 }
1222
1223
1224 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1225 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1226 error = -EINVAL;
1227 if (alloced)
1228 goto trunc;
1229 else
1230 goto failed;
1231 }
1232 *pagep = page;
1233 return 0;
1234
1235
1236
1237
1238trunc:
1239 info = SHMEM_I(inode);
1240 ClearPageDirty(page);
1241 delete_from_page_cache(page);
1242 spin_lock(&info->lock);
1243 info->alloced--;
1244 inode->i_blocks -= BLOCKS_PER_PAGE;
1245 spin_unlock(&info->lock);
1246decused:
1247 sbinfo = SHMEM_SB(inode->i_sb);
1248 if (sbinfo->max_blocks)
1249 percpu_counter_add(&sbinfo->used_blocks, -1);
1250unacct:
1251 shmem_unacct_blocks(info->flags, 1);
1252failed:
1253 if (swap.val && error != -EINVAL &&
1254 !shmem_confirm_swap(mapping, index, swap))
1255 error = -EEXIST;
1256unlock:
1257 if (page) {
1258 unlock_page(page);
1259 page_cache_release(page);
1260 }
1261 if (error == -ENOSPC && !once++) {
1262 info = SHMEM_I(inode);
1263 spin_lock(&info->lock);
1264 shmem_recalc_inode(inode);
1265 spin_unlock(&info->lock);
1266 goto repeat;
1267 }
1268 if (error == -EEXIST)
1269 goto repeat;
1270 return error;
1271}
1272
1273static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1274{
1275 struct inode *inode = file_inode(vma->vm_file);
1276 int error;
1277 int ret = VM_FAULT_LOCKED;
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296 if (unlikely(inode->i_private)) {
1297 struct shmem_falloc *shmem_falloc;
1298
1299 spin_lock(&inode->i_lock);
1300 shmem_falloc = inode->i_private;
1301 if (shmem_falloc &&
1302 shmem_falloc->waitq &&
1303 vmf->pgoff >= shmem_falloc->start &&
1304 vmf->pgoff < shmem_falloc->next) {
1305 wait_queue_head_t *shmem_falloc_waitq;
1306 DEFINE_WAIT(shmem_fault_wait);
1307
1308 ret = VM_FAULT_NOPAGE;
1309 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1310 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1311
1312 up_read(&vma->vm_mm->mmap_sem);
1313 ret = VM_FAULT_RETRY;
1314 }
1315
1316 shmem_falloc_waitq = shmem_falloc->waitq;
1317 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1318 TASK_UNINTERRUPTIBLE);
1319 spin_unlock(&inode->i_lock);
1320 schedule();
1321
1322
1323
1324
1325
1326
1327
1328
1329 spin_lock(&inode->i_lock);
1330 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1331 spin_unlock(&inode->i_lock);
1332 return ret;
1333 }
1334 spin_unlock(&inode->i_lock);
1335 }
1336
1337 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1338 if (error)
1339 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1340
1341 if (ret & VM_FAULT_MAJOR) {
1342 count_vm_event(PGMAJFAULT);
1343 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1344 }
1345 return ret;
1346}
1347
1348#ifdef CONFIG_NUMA
1349static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1350{
1351 struct inode *inode = file_inode(vma->vm_file);
1352 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1353}
1354
1355static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1356 unsigned long addr)
1357{
1358 struct inode *inode = file_inode(vma->vm_file);
1359 pgoff_t index;
1360
1361 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1362 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1363}
1364#endif
1365
1366int shmem_lock(struct file *file, int lock, struct user_struct *user)
1367{
1368 struct inode *inode = file_inode(file);
1369 struct shmem_inode_info *info = SHMEM_I(inode);
1370 int retval = -ENOMEM;
1371
1372 spin_lock(&info->lock);
1373 if (lock && !(info->flags & VM_LOCKED)) {
1374 if (!user_shm_lock(inode->i_size, user))
1375 goto out_nomem;
1376 info->flags |= VM_LOCKED;
1377 mapping_set_unevictable(file->f_mapping);
1378 }
1379 if (!lock && (info->flags & VM_LOCKED) && user) {
1380 user_shm_unlock(inode->i_size, user);
1381 info->flags &= ~VM_LOCKED;
1382 mapping_clear_unevictable(file->f_mapping);
1383 }
1384 retval = 0;
1385
1386out_nomem:
1387 spin_unlock(&info->lock);
1388 return retval;
1389}
1390
1391static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1392{
1393 file_accessed(file);
1394 vma->vm_ops = &shmem_vm_ops;
1395 return 0;
1396}
1397
1398static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1399 umode_t mode, dev_t dev, unsigned long flags)
1400{
1401 struct inode *inode;
1402 struct shmem_inode_info *info;
1403 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1404
1405 if (shmem_reserve_inode(sb))
1406 return NULL;
1407
1408 inode = new_inode(sb);
1409 if (inode) {
1410 inode->i_ino = get_next_ino();
1411 inode_init_owner(inode, dir, mode);
1412 inode->i_blocks = 0;
1413 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1414 inode->i_generation = get_seconds();
1415 info = SHMEM_I(inode);
1416 memset(info, 0, (char *)inode - (char *)info);
1417 spin_lock_init(&info->lock);
1418 info->seals = F_SEAL_SEAL;
1419 info->flags = flags & VM_NORESERVE;
1420 INIT_LIST_HEAD(&info->swaplist);
1421 simple_xattrs_init(&info->xattrs);
1422 cache_no_acl(inode);
1423
1424 switch (mode & S_IFMT) {
1425 default:
1426 inode->i_op = &shmem_special_inode_operations;
1427 init_special_inode(inode, mode, dev);
1428 break;
1429 case S_IFREG:
1430 inode->i_mapping->a_ops = &shmem_aops;
1431 inode->i_op = &shmem_inode_operations;
1432 inode->i_fop = &shmem_file_operations;
1433 mpol_shared_policy_init(&info->policy,
1434 shmem_get_sbmpol(sbinfo));
1435 break;
1436 case S_IFDIR:
1437 inc_nlink(inode);
1438
1439 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1440 inode->i_op = &shmem_dir_inode_operations;
1441 inode->i_fop = &simple_dir_operations;
1442 break;
1443 case S_IFLNK:
1444
1445
1446
1447
1448 mpol_shared_policy_init(&info->policy, NULL);
1449 break;
1450 }
1451 } else
1452 shmem_free_inode(sb);
1453 return inode;
1454}
1455
1456bool shmem_mapping(struct address_space *mapping)
1457{
1458 if (!mapping->host)
1459 return false;
1460
1461 return mapping->host->i_sb->s_op == &shmem_ops;
1462}
1463
1464#ifdef CONFIG_TMPFS
1465static const struct inode_operations shmem_symlink_inode_operations;
1466static const struct inode_operations shmem_short_symlink_operations;
1467
1468#ifdef CONFIG_TMPFS_XATTR
1469static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1470#else
1471#define shmem_initxattrs NULL
1472#endif
1473
1474static int
1475shmem_write_begin(struct file *file, struct address_space *mapping,
1476 loff_t pos, unsigned len, unsigned flags,
1477 struct page **pagep, void **fsdata)
1478{
1479 struct inode *inode = mapping->host;
1480 struct shmem_inode_info *info = SHMEM_I(inode);
1481 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1482
1483
1484 if (unlikely(info->seals)) {
1485 if (info->seals & F_SEAL_WRITE)
1486 return -EPERM;
1487 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1488 return -EPERM;
1489 }
1490
1491 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1492}
1493
1494static int
1495shmem_write_end(struct file *file, struct address_space *mapping,
1496 loff_t pos, unsigned len, unsigned copied,
1497 struct page *page, void *fsdata)
1498{
1499 struct inode *inode = mapping->host;
1500
1501 if (pos + copied > inode->i_size)
1502 i_size_write(inode, pos + copied);
1503
1504 if (!PageUptodate(page)) {
1505 if (copied < PAGE_CACHE_SIZE) {
1506 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1507 zero_user_segments(page, 0, from,
1508 from + copied, PAGE_CACHE_SIZE);
1509 }
1510 SetPageUptodate(page);
1511 }
1512 set_page_dirty(page);
1513 unlock_page(page);
1514 page_cache_release(page);
1515
1516 return copied;
1517}
1518
1519static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1520{
1521 struct file *file = iocb->ki_filp;
1522 struct inode *inode = file_inode(file);
1523 struct address_space *mapping = inode->i_mapping;
1524 pgoff_t index;
1525 unsigned long offset;
1526 enum sgp_type sgp = SGP_READ;
1527 int error = 0;
1528 ssize_t retval = 0;
1529 loff_t *ppos = &iocb->ki_pos;
1530
1531
1532
1533
1534
1535
1536 if (!iter_is_iovec(to))
1537 sgp = SGP_DIRTY;
1538
1539 index = *ppos >> PAGE_CACHE_SHIFT;
1540 offset = *ppos & ~PAGE_CACHE_MASK;
1541
1542 for (;;) {
1543 struct page *page = NULL;
1544 pgoff_t end_index;
1545 unsigned long nr, ret;
1546 loff_t i_size = i_size_read(inode);
1547
1548 end_index = i_size >> PAGE_CACHE_SHIFT;
1549 if (index > end_index)
1550 break;
1551 if (index == end_index) {
1552 nr = i_size & ~PAGE_CACHE_MASK;
1553 if (nr <= offset)
1554 break;
1555 }
1556
1557 error = shmem_getpage(inode, index, &page, sgp, NULL);
1558 if (error) {
1559 if (error == -EINVAL)
1560 error = 0;
1561 break;
1562 }
1563 if (page)
1564 unlock_page(page);
1565
1566
1567
1568
1569
1570 nr = PAGE_CACHE_SIZE;
1571 i_size = i_size_read(inode);
1572 end_index = i_size >> PAGE_CACHE_SHIFT;
1573 if (index == end_index) {
1574 nr = i_size & ~PAGE_CACHE_MASK;
1575 if (nr <= offset) {
1576 if (page)
1577 page_cache_release(page);
1578 break;
1579 }
1580 }
1581 nr -= offset;
1582
1583 if (page) {
1584
1585
1586
1587
1588
1589 if (mapping_writably_mapped(mapping))
1590 flush_dcache_page(page);
1591
1592
1593
1594 if (!offset)
1595 mark_page_accessed(page);
1596 } else {
1597 page = ZERO_PAGE(0);
1598 page_cache_get(page);
1599 }
1600
1601
1602
1603
1604
1605 ret = copy_page_to_iter(page, offset, nr, to);
1606 retval += ret;
1607 offset += ret;
1608 index += offset >> PAGE_CACHE_SHIFT;
1609 offset &= ~PAGE_CACHE_MASK;
1610
1611 page_cache_release(page);
1612 if (!iov_iter_count(to))
1613 break;
1614 if (ret < nr) {
1615 error = -EFAULT;
1616 break;
1617 }
1618 cond_resched();
1619 }
1620
1621 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1622 file_accessed(file);
1623 return retval ? retval : error;
1624}
1625
1626static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1627 struct pipe_inode_info *pipe, size_t len,
1628 unsigned int flags)
1629{
1630 struct address_space *mapping = in->f_mapping;
1631 struct inode *inode = mapping->host;
1632 unsigned int loff, nr_pages, req_pages;
1633 struct page *pages[PIPE_DEF_BUFFERS];
1634 struct partial_page partial[PIPE_DEF_BUFFERS];
1635 struct page *page;
1636 pgoff_t index, end_index;
1637 loff_t isize, left;
1638 int error, page_nr;
1639 struct splice_pipe_desc spd = {
1640 .pages = pages,
1641 .partial = partial,
1642 .nr_pages_max = PIPE_DEF_BUFFERS,
1643 .flags = flags,
1644 .ops = &page_cache_pipe_buf_ops,
1645 .spd_release = spd_release_page,
1646 };
1647
1648 isize = i_size_read(inode);
1649 if (unlikely(*ppos >= isize))
1650 return 0;
1651
1652 left = isize - *ppos;
1653 if (unlikely(left < len))
1654 len = left;
1655
1656 if (splice_grow_spd(pipe, &spd))
1657 return -ENOMEM;
1658
1659 index = *ppos >> PAGE_CACHE_SHIFT;
1660 loff = *ppos & ~PAGE_CACHE_MASK;
1661 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1662 nr_pages = min(req_pages, spd.nr_pages_max);
1663
1664 spd.nr_pages = find_get_pages_contig(mapping, index,
1665 nr_pages, spd.pages);
1666 index += spd.nr_pages;
1667 error = 0;
1668
1669 while (spd.nr_pages < nr_pages) {
1670 error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1671 if (error)
1672 break;
1673 unlock_page(page);
1674 spd.pages[spd.nr_pages++] = page;
1675 index++;
1676 }
1677
1678 index = *ppos >> PAGE_CACHE_SHIFT;
1679 nr_pages = spd.nr_pages;
1680 spd.nr_pages = 0;
1681
1682 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1683 unsigned int this_len;
1684
1685 if (!len)
1686 break;
1687
1688 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1689 page = spd.pages[page_nr];
1690
1691 if (!PageUptodate(page) || page->mapping != mapping) {
1692 error = shmem_getpage(inode, index, &page,
1693 SGP_CACHE, NULL);
1694 if (error)
1695 break;
1696 unlock_page(page);
1697 page_cache_release(spd.pages[page_nr]);
1698 spd.pages[page_nr] = page;
1699 }
1700
1701 isize = i_size_read(inode);
1702 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1703 if (unlikely(!isize || index > end_index))
1704 break;
1705
1706 if (end_index == index) {
1707 unsigned int plen;
1708
1709 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1710 if (plen <= loff)
1711 break;
1712
1713 this_len = min(this_len, plen - loff);
1714 len = this_len;
1715 }
1716
1717 spd.partial[page_nr].offset = loff;
1718 spd.partial[page_nr].len = this_len;
1719 len -= this_len;
1720 loff = 0;
1721 spd.nr_pages++;
1722 index++;
1723 }
1724
1725 while (page_nr < nr_pages)
1726 page_cache_release(spd.pages[page_nr++]);
1727
1728 if (spd.nr_pages)
1729 error = splice_to_pipe(pipe, &spd);
1730
1731 splice_shrink_spd(&spd);
1732
1733 if (error > 0) {
1734 *ppos += error;
1735 file_accessed(in);
1736 }
1737 return error;
1738}
1739
1740
1741
1742
1743static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1744 pgoff_t index, pgoff_t end, int whence)
1745{
1746 struct page *page;
1747 struct pagevec pvec;
1748 pgoff_t indices[PAGEVEC_SIZE];
1749 bool done = false;
1750 int i;
1751
1752 pagevec_init(&pvec, 0);
1753 pvec.nr = 1;
1754 while (!done) {
1755 pvec.nr = find_get_entries(mapping, index,
1756 pvec.nr, pvec.pages, indices);
1757 if (!pvec.nr) {
1758 if (whence == SEEK_DATA)
1759 index = end;
1760 break;
1761 }
1762 for (i = 0; i < pvec.nr; i++, index++) {
1763 if (index < indices[i]) {
1764 if (whence == SEEK_HOLE) {
1765 done = true;
1766 break;
1767 }
1768 index = indices[i];
1769 }
1770 page = pvec.pages[i];
1771 if (page && !radix_tree_exceptional_entry(page)) {
1772 if (!PageUptodate(page))
1773 page = NULL;
1774 }
1775 if (index >= end ||
1776 (page && whence == SEEK_DATA) ||
1777 (!page && whence == SEEK_HOLE)) {
1778 done = true;
1779 break;
1780 }
1781 }
1782 pagevec_remove_exceptionals(&pvec);
1783 pagevec_release(&pvec);
1784 pvec.nr = PAGEVEC_SIZE;
1785 cond_resched();
1786 }
1787 return index;
1788}
1789
1790static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
1791{
1792 struct address_space *mapping = file->f_mapping;
1793 struct inode *inode = mapping->host;
1794 pgoff_t start, end;
1795 loff_t new_offset;
1796
1797 if (whence != SEEK_DATA && whence != SEEK_HOLE)
1798 return generic_file_llseek_size(file, offset, whence,
1799 MAX_LFS_FILESIZE, i_size_read(inode));
1800 mutex_lock(&inode->i_mutex);
1801
1802
1803 if (offset < 0)
1804 offset = -EINVAL;
1805 else if (offset >= inode->i_size)
1806 offset = -ENXIO;
1807 else {
1808 start = offset >> PAGE_CACHE_SHIFT;
1809 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1810 new_offset = shmem_seek_hole_data(mapping, start, end, whence);
1811 new_offset <<= PAGE_CACHE_SHIFT;
1812 if (new_offset > offset) {
1813 if (new_offset < inode->i_size)
1814 offset = new_offset;
1815 else if (whence == SEEK_DATA)
1816 offset = -ENXIO;
1817 else
1818 offset = inode->i_size;
1819 }
1820 }
1821
1822 if (offset >= 0)
1823 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
1824 mutex_unlock(&inode->i_mutex);
1825 return offset;
1826}
1827
1828
1829
1830
1831
1832#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
1833#define LAST_SCAN 4
1834
1835static void shmem_tag_pins(struct address_space *mapping)
1836{
1837 struct radix_tree_iter iter;
1838 void **slot;
1839 pgoff_t start;
1840 struct page *page;
1841
1842 lru_add_drain();
1843 start = 0;
1844 rcu_read_lock();
1845
1846restart:
1847 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1848 page = radix_tree_deref_slot(slot);
1849 if (!page || radix_tree_exception(page)) {
1850 if (radix_tree_deref_retry(page))
1851 goto restart;
1852 } else if (page_count(page) - page_mapcount(page) > 1) {
1853 spin_lock_irq(&mapping->tree_lock);
1854 radix_tree_tag_set(&mapping->page_tree, iter.index,
1855 SHMEM_TAG_PINNED);
1856 spin_unlock_irq(&mapping->tree_lock);
1857 }
1858
1859 if (need_resched()) {
1860 cond_resched_rcu();
1861 start = iter.index + 1;
1862 goto restart;
1863 }
1864 }
1865 rcu_read_unlock();
1866}
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877static int shmem_wait_for_pins(struct address_space *mapping)
1878{
1879 struct radix_tree_iter iter;
1880 void **slot;
1881 pgoff_t start;
1882 struct page *page;
1883 int error, scan;
1884
1885 shmem_tag_pins(mapping);
1886
1887 error = 0;
1888 for (scan = 0; scan <= LAST_SCAN; scan++) {
1889 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
1890 break;
1891
1892 if (!scan)
1893 lru_add_drain_all();
1894 else if (schedule_timeout_killable((HZ << scan) / 200))
1895 scan = LAST_SCAN;
1896
1897 start = 0;
1898 rcu_read_lock();
1899restart:
1900 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
1901 start, SHMEM_TAG_PINNED) {
1902
1903 page = radix_tree_deref_slot(slot);
1904 if (radix_tree_exception(page)) {
1905 if (radix_tree_deref_retry(page))
1906 goto restart;
1907
1908 page = NULL;
1909 }
1910
1911 if (page &&
1912 page_count(page) - page_mapcount(page) != 1) {
1913 if (scan < LAST_SCAN)
1914 goto continue_resched;
1915
1916
1917
1918
1919
1920
1921 error = -EBUSY;
1922 }
1923
1924 spin_lock_irq(&mapping->tree_lock);
1925 radix_tree_tag_clear(&mapping->page_tree,
1926 iter.index, SHMEM_TAG_PINNED);
1927 spin_unlock_irq(&mapping->tree_lock);
1928continue_resched:
1929 if (need_resched()) {
1930 cond_resched_rcu();
1931 start = iter.index + 1;
1932 goto restart;
1933 }
1934 }
1935 rcu_read_unlock();
1936 }
1937
1938 return error;
1939}
1940
1941#define F_ALL_SEALS (F_SEAL_SEAL | \
1942 F_SEAL_SHRINK | \
1943 F_SEAL_GROW | \
1944 F_SEAL_WRITE)
1945
1946int shmem_add_seals(struct file *file, unsigned int seals)
1947{
1948 struct inode *inode = file_inode(file);
1949 struct shmem_inode_info *info = SHMEM_I(inode);
1950 int error;
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982 if (file->f_op != &shmem_file_operations)
1983 return -EINVAL;
1984 if (!(file->f_mode & FMODE_WRITE))
1985 return -EPERM;
1986 if (seals & ~(unsigned int)F_ALL_SEALS)
1987 return -EINVAL;
1988
1989 mutex_lock(&inode->i_mutex);
1990
1991 if (info->seals & F_SEAL_SEAL) {
1992 error = -EPERM;
1993 goto unlock;
1994 }
1995
1996 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
1997 error = mapping_deny_writable(file->f_mapping);
1998 if (error)
1999 goto unlock;
2000
2001 error = shmem_wait_for_pins(file->f_mapping);
2002 if (error) {
2003 mapping_allow_writable(file->f_mapping);
2004 goto unlock;
2005 }
2006 }
2007
2008 info->seals |= seals;
2009 error = 0;
2010
2011unlock:
2012 mutex_unlock(&inode->i_mutex);
2013 return error;
2014}
2015EXPORT_SYMBOL_GPL(shmem_add_seals);
2016
2017int shmem_get_seals(struct file *file)
2018{
2019 if (file->f_op != &shmem_file_operations)
2020 return -EINVAL;
2021
2022 return SHMEM_I(file_inode(file))->seals;
2023}
2024EXPORT_SYMBOL_GPL(shmem_get_seals);
2025
2026long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2027{
2028 long error;
2029
2030 switch (cmd) {
2031 case F_ADD_SEALS:
2032
2033 if (arg > UINT_MAX)
2034 return -EINVAL;
2035
2036 error = shmem_add_seals(file, arg);
2037 break;
2038 case F_GET_SEALS:
2039 error = shmem_get_seals(file);
2040 break;
2041 default:
2042 error = -EINVAL;
2043 break;
2044 }
2045
2046 return error;
2047}
2048
2049static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2050 loff_t len)
2051{
2052 struct inode *inode = file_inode(file);
2053 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2054 struct shmem_inode_info *info = SHMEM_I(inode);
2055 struct shmem_falloc shmem_falloc;
2056 pgoff_t start, index, end;
2057 int error;
2058
2059 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2060 return -EOPNOTSUPP;
2061
2062 mutex_lock(&inode->i_mutex);
2063
2064 if (mode & FALLOC_FL_PUNCH_HOLE) {
2065 struct address_space *mapping = file->f_mapping;
2066 loff_t unmap_start = round_up(offset, PAGE_SIZE);
2067 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
2068 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
2069
2070
2071 if (info->seals & F_SEAL_WRITE) {
2072 error = -EPERM;
2073 goto out;
2074 }
2075
2076 shmem_falloc.waitq = &shmem_falloc_waitq;
2077 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
2078 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
2079 spin_lock(&inode->i_lock);
2080 inode->i_private = &shmem_falloc;
2081 spin_unlock(&inode->i_lock);
2082
2083 if ((u64)unmap_end > (u64)unmap_start)
2084 unmap_mapping_range(mapping, unmap_start,
2085 1 + unmap_end - unmap_start, 0);
2086 shmem_truncate_range(inode, offset, offset + len - 1);
2087
2088
2089 spin_lock(&inode->i_lock);
2090 inode->i_private = NULL;
2091 wake_up_all(&shmem_falloc_waitq);
2092 spin_unlock(&inode->i_lock);
2093 error = 0;
2094 goto out;
2095 }
2096
2097
2098 error = inode_newsize_ok(inode, offset + len);
2099 if (error)
2100 goto out;
2101
2102 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2103 error = -EPERM;
2104 goto out;
2105 }
2106
2107 start = offset >> PAGE_CACHE_SHIFT;
2108 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2109
2110 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
2111 error = -ENOSPC;
2112 goto out;
2113 }
2114
2115 shmem_falloc.waitq = NULL;
2116 shmem_falloc.start = start;
2117 shmem_falloc.next = start;
2118 shmem_falloc.nr_falloced = 0;
2119 shmem_falloc.nr_unswapped = 0;
2120 spin_lock(&inode->i_lock);
2121 inode->i_private = &shmem_falloc;
2122 spin_unlock(&inode->i_lock);
2123
2124 for (index = start; index < end; index++) {
2125 struct page *page;
2126
2127
2128
2129
2130
2131 if (signal_pending(current))
2132 error = -EINTR;
2133 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
2134 error = -ENOMEM;
2135 else
2136 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
2137 NULL);
2138 if (error) {
2139
2140 shmem_undo_range(inode,
2141 (loff_t)start << PAGE_CACHE_SHIFT,
2142 (loff_t)index << PAGE_CACHE_SHIFT, true);
2143 goto undone;
2144 }
2145
2146
2147
2148
2149
2150 shmem_falloc.next++;
2151 if (!PageUptodate(page))
2152 shmem_falloc.nr_falloced++;
2153
2154
2155
2156
2157
2158
2159
2160
2161 set_page_dirty(page);
2162 unlock_page(page);
2163 page_cache_release(page);
2164 cond_resched();
2165 }
2166
2167 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
2168 i_size_write(inode, offset + len);
2169 inode->i_ctime = CURRENT_TIME;
2170undone:
2171 spin_lock(&inode->i_lock);
2172 inode->i_private = NULL;
2173 spin_unlock(&inode->i_lock);
2174out:
2175 mutex_unlock(&inode->i_mutex);
2176 return error;
2177}
2178
2179static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
2180{
2181 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
2182
2183 buf->f_type = TMPFS_MAGIC;
2184 buf->f_bsize = PAGE_CACHE_SIZE;
2185 buf->f_namelen = NAME_MAX;
2186 if (sbinfo->max_blocks) {
2187 buf->f_blocks = sbinfo->max_blocks;
2188 buf->f_bavail =
2189 buf->f_bfree = sbinfo->max_blocks -
2190 percpu_counter_sum(&sbinfo->used_blocks);
2191 }
2192 if (sbinfo->max_inodes) {
2193 buf->f_files = sbinfo->max_inodes;
2194 buf->f_ffree = sbinfo->free_inodes;
2195 }
2196
2197 return 0;
2198}
2199
2200
2201
2202
2203static int
2204shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2205{
2206 struct inode *inode;
2207 int error = -ENOSPC;
2208
2209 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
2210 if (inode) {
2211 error = simple_acl_create(dir, inode);
2212 if (error)
2213 goto out_iput;
2214 error = security_inode_init_security(inode, dir,
2215 &dentry->d_name,
2216 shmem_initxattrs, NULL);
2217 if (error && error != -EOPNOTSUPP)
2218 goto out_iput;
2219
2220 error = 0;
2221 dir->i_size += BOGO_DIRENT_SIZE;
2222 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2223 d_instantiate(dentry, inode);
2224 dget(dentry);
2225 }
2226 return error;
2227out_iput:
2228 iput(inode);
2229 return error;
2230}
2231
2232static int
2233shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2234{
2235 struct inode *inode;
2236 int error = -ENOSPC;
2237
2238 inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
2239 if (inode) {
2240 error = security_inode_init_security(inode, dir,
2241 NULL,
2242 shmem_initxattrs, NULL);
2243 if (error && error != -EOPNOTSUPP)
2244 goto out_iput;
2245 error = simple_acl_create(dir, inode);
2246 if (error)
2247 goto out_iput;
2248 d_tmpfile(dentry, inode);
2249 }
2250 return error;
2251out_iput:
2252 iput(inode);
2253 return error;
2254}
2255
2256static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2257{
2258 int error;
2259
2260 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
2261 return error;
2262 inc_nlink(dir);
2263 return 0;
2264}
2265
2266static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2267 bool excl)
2268{
2269 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
2270}
2271
2272
2273
2274
2275static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2276{
2277 struct inode *inode = d_inode(old_dentry);
2278 int ret;
2279
2280
2281
2282
2283
2284
2285 ret = shmem_reserve_inode(inode->i_sb);
2286 if (ret)
2287 goto out;
2288
2289 dir->i_size += BOGO_DIRENT_SIZE;
2290 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2291 inc_nlink(inode);
2292 ihold(inode);
2293 dget(dentry);
2294 d_instantiate(dentry, inode);
2295out:
2296 return ret;
2297}
2298
2299static int shmem_unlink(struct inode *dir, struct dentry *dentry)
2300{
2301 struct inode *inode = d_inode(dentry);
2302
2303 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
2304 shmem_free_inode(inode->i_sb);
2305
2306 dir->i_size -= BOGO_DIRENT_SIZE;
2307 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2308 drop_nlink(inode);
2309 dput(dentry);
2310 return 0;
2311}
2312
2313static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
2314{
2315 if (!simple_empty(dentry))
2316 return -ENOTEMPTY;
2317
2318 drop_nlink(d_inode(dentry));
2319 drop_nlink(dir);
2320 return shmem_unlink(dir, dentry);
2321}
2322
2323static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2324{
2325 bool old_is_dir = d_is_dir(old_dentry);
2326 bool new_is_dir = d_is_dir(new_dentry);
2327
2328 if (old_dir != new_dir && old_is_dir != new_is_dir) {
2329 if (old_is_dir) {
2330 drop_nlink(old_dir);
2331 inc_nlink(new_dir);
2332 } else {
2333 drop_nlink(new_dir);
2334 inc_nlink(old_dir);
2335 }
2336 }
2337 old_dir->i_ctime = old_dir->i_mtime =
2338 new_dir->i_ctime = new_dir->i_mtime =
2339 d_inode(old_dentry)->i_ctime =
2340 d_inode(new_dentry)->i_ctime = CURRENT_TIME;
2341
2342 return 0;
2343}
2344
2345static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2346{
2347 struct dentry *whiteout;
2348 int error;
2349
2350 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2351 if (!whiteout)
2352 return -ENOMEM;
2353
2354 error = shmem_mknod(old_dir, whiteout,
2355 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2356 dput(whiteout);
2357 if (error)
2358 return error;
2359
2360
2361
2362
2363
2364
2365
2366
2367 d_rehash(whiteout);
2368 return 0;
2369}
2370
2371
2372
2373
2374
2375
2376
2377static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
2378{
2379 struct inode *inode = d_inode(old_dentry);
2380 int they_are_dirs = S_ISDIR(inode->i_mode);
2381
2382 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2383 return -EINVAL;
2384
2385 if (flags & RENAME_EXCHANGE)
2386 return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
2387
2388 if (!simple_empty(new_dentry))
2389 return -ENOTEMPTY;
2390
2391 if (flags & RENAME_WHITEOUT) {
2392 int error;
2393
2394 error = shmem_whiteout(old_dir, old_dentry);
2395 if (error)
2396 return error;
2397 }
2398
2399 if (d_really_is_positive(new_dentry)) {
2400 (void) shmem_unlink(new_dir, new_dentry);
2401 if (they_are_dirs) {
2402 drop_nlink(d_inode(new_dentry));
2403 drop_nlink(old_dir);
2404 }
2405 } else if (they_are_dirs) {
2406 drop_nlink(old_dir);
2407 inc_nlink(new_dir);
2408 }
2409
2410 old_dir->i_size -= BOGO_DIRENT_SIZE;
2411 new_dir->i_size += BOGO_DIRENT_SIZE;
2412 old_dir->i_ctime = old_dir->i_mtime =
2413 new_dir->i_ctime = new_dir->i_mtime =
2414 inode->i_ctime = CURRENT_TIME;
2415 return 0;
2416}
2417
2418static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2419{
2420 int error;
2421 int len;
2422 struct inode *inode;
2423 struct page *page;
2424 char *kaddr;
2425 struct shmem_inode_info *info;
2426
2427 len = strlen(symname) + 1;
2428 if (len > PAGE_CACHE_SIZE)
2429 return -ENAMETOOLONG;
2430
2431 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
2432 if (!inode)
2433 return -ENOSPC;
2434
2435 error = security_inode_init_security(inode, dir, &dentry->d_name,
2436 shmem_initxattrs, NULL);
2437 if (error) {
2438 if (error != -EOPNOTSUPP) {
2439 iput(inode);
2440 return error;
2441 }
2442 error = 0;
2443 }
2444
2445 info = SHMEM_I(inode);
2446 inode->i_size = len-1;
2447 if (len <= SHORT_SYMLINK_LEN) {
2448 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2449 if (!info->symlink) {
2450 iput(inode);
2451 return -ENOMEM;
2452 }
2453 inode->i_op = &shmem_short_symlink_operations;
2454 } else {
2455 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2456 if (error) {
2457 iput(inode);
2458 return error;
2459 }
2460 inode->i_mapping->a_ops = &shmem_aops;
2461 inode->i_op = &shmem_symlink_inode_operations;
2462 kaddr = kmap_atomic(page);
2463 memcpy(kaddr, symname, len);
2464 kunmap_atomic(kaddr);
2465 SetPageUptodate(page);
2466 set_page_dirty(page);
2467 unlock_page(page);
2468 page_cache_release(page);
2469 }
2470 dir->i_size += BOGO_DIRENT_SIZE;
2471 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2472 d_instantiate(dentry, inode);
2473 dget(dentry);
2474 return 0;
2475}
2476
2477static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2478{
2479 nd_set_link(nd, SHMEM_I(d_inode(dentry))->symlink);
2480 return NULL;
2481}
2482
2483static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2484{
2485 struct page *page = NULL;
2486 int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
2487 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2488 if (page)
2489 unlock_page(page);
2490 return page;
2491}
2492
2493static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2494{
2495 if (!IS_ERR(nd_get_link(nd))) {
2496 struct page *page = cookie;
2497 kunmap(page);
2498 mark_page_accessed(page);
2499 page_cache_release(page);
2500 }
2501}
2502
2503#ifdef CONFIG_TMPFS_XATTR
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514static int shmem_initxattrs(struct inode *inode,
2515 const struct xattr *xattr_array,
2516 void *fs_info)
2517{
2518 struct shmem_inode_info *info = SHMEM_I(inode);
2519 const struct xattr *xattr;
2520 struct simple_xattr *new_xattr;
2521 size_t len;
2522
2523 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
2524 new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
2525 if (!new_xattr)
2526 return -ENOMEM;
2527
2528 len = strlen(xattr->name) + 1;
2529 new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
2530 GFP_KERNEL);
2531 if (!new_xattr->name) {
2532 kfree(new_xattr);
2533 return -ENOMEM;
2534 }
2535
2536 memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
2537 XATTR_SECURITY_PREFIX_LEN);
2538 memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
2539 xattr->name, len);
2540
2541 simple_xattr_list_add(&info->xattrs, new_xattr);
2542 }
2543
2544 return 0;
2545}
2546
2547static const struct xattr_handler *shmem_xattr_handlers[] = {
2548#ifdef CONFIG_TMPFS_POSIX_ACL
2549 &posix_acl_access_xattr_handler,
2550 &posix_acl_default_xattr_handler,
2551#endif
2552 NULL
2553};
2554
2555static int shmem_xattr_validate(const char *name)
2556{
2557 struct { const char *prefix; size_t len; } arr[] = {
2558 { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
2559 { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
2560 };
2561 int i;
2562
2563 for (i = 0; i < ARRAY_SIZE(arr); i++) {
2564 size_t preflen = arr[i].len;
2565 if (strncmp(name, arr[i].prefix, preflen) == 0) {
2566 if (!name[preflen])
2567 return -EINVAL;
2568 return 0;
2569 }
2570 }
2571 return -EOPNOTSUPP;
2572}
2573
2574static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
2575 void *buffer, size_t size)
2576{
2577 struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
2578 int err;
2579
2580
2581
2582
2583
2584
2585 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2586 return generic_getxattr(dentry, name, buffer, size);
2587
2588 err = shmem_xattr_validate(name);
2589 if (err)
2590 return err;
2591
2592 return simple_xattr_get(&info->xattrs, name, buffer, size);
2593}
2594
2595static int shmem_setxattr(struct dentry *dentry, const char *name,
2596 const void *value, size_t size, int flags)
2597{
2598 struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
2599 int err;
2600
2601
2602
2603
2604
2605
2606 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2607 return generic_setxattr(dentry, name, value, size, flags);
2608
2609 err = shmem_xattr_validate(name);
2610 if (err)
2611 return err;
2612
2613 return simple_xattr_set(&info->xattrs, name, value, size, flags);
2614}
2615
2616static int shmem_removexattr(struct dentry *dentry, const char *name)
2617{
2618 struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
2619 int err;
2620
2621
2622
2623
2624
2625
2626 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2627 return generic_removexattr(dentry, name);
2628
2629 err = shmem_xattr_validate(name);
2630 if (err)
2631 return err;
2632
2633 return simple_xattr_remove(&info->xattrs, name);
2634}
2635
2636static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2637{
2638 struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
2639 return simple_xattr_list(&info->xattrs, buffer, size);
2640}
2641#endif
2642
2643static const struct inode_operations shmem_short_symlink_operations = {
2644 .readlink = generic_readlink,
2645 .follow_link = shmem_follow_short_symlink,
2646#ifdef CONFIG_TMPFS_XATTR
2647 .setxattr = shmem_setxattr,
2648 .getxattr = shmem_getxattr,
2649 .listxattr = shmem_listxattr,
2650 .removexattr = shmem_removexattr,
2651#endif
2652};
2653
2654static const struct inode_operations shmem_symlink_inode_operations = {
2655 .readlink = generic_readlink,
2656 .follow_link = shmem_follow_link,
2657 .put_link = shmem_put_link,
2658#ifdef CONFIG_TMPFS_XATTR
2659 .setxattr = shmem_setxattr,
2660 .getxattr = shmem_getxattr,
2661 .listxattr = shmem_listxattr,
2662 .removexattr = shmem_removexattr,
2663#endif
2664};
2665
2666static struct dentry *shmem_get_parent(struct dentry *child)
2667{
2668 return ERR_PTR(-ESTALE);
2669}
2670
2671static int shmem_match(struct inode *ino, void *vfh)
2672{
2673 __u32 *fh = vfh;
2674 __u64 inum = fh[2];
2675 inum = (inum << 32) | fh[1];
2676 return ino->i_ino == inum && fh[0] == ino->i_generation;
2677}
2678
2679static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2680 struct fid *fid, int fh_len, int fh_type)
2681{
2682 struct inode *inode;
2683 struct dentry *dentry = NULL;
2684 u64 inum;
2685
2686 if (fh_len < 3)
2687 return NULL;
2688
2689 inum = fid->raw[2];
2690 inum = (inum << 32) | fid->raw[1];
2691
2692 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2693 shmem_match, fid->raw);
2694 if (inode) {
2695 dentry = d_find_alias(inode);
2696 iput(inode);
2697 }
2698
2699 return dentry;
2700}
2701
2702static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
2703 struct inode *parent)
2704{
2705 if (*len < 3) {
2706 *len = 3;
2707 return FILEID_INVALID;
2708 }
2709
2710 if (inode_unhashed(inode)) {
2711
2712
2713
2714
2715
2716 static DEFINE_SPINLOCK(lock);
2717 spin_lock(&lock);
2718 if (inode_unhashed(inode))
2719 __insert_inode_hash(inode,
2720 inode->i_ino + inode->i_generation);
2721 spin_unlock(&lock);
2722 }
2723
2724 fh[0] = inode->i_generation;
2725 fh[1] = inode->i_ino;
2726 fh[2] = ((__u64)inode->i_ino) >> 32;
2727
2728 *len = 3;
2729 return 1;
2730}
2731
2732static const struct export_operations shmem_export_ops = {
2733 .get_parent = shmem_get_parent,
2734 .encode_fh = shmem_encode_fh,
2735 .fh_to_dentry = shmem_fh_to_dentry,
2736};
2737
2738static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2739 bool remount)
2740{
2741 char *this_char, *value, *rest;
2742 struct mempolicy *mpol = NULL;
2743 uid_t uid;
2744 gid_t gid;
2745
2746 while (options != NULL) {
2747 this_char = options;
2748 for (;;) {
2749
2750
2751
2752
2753
2754 options = strchr(options, ',');
2755 if (options == NULL)
2756 break;
2757 options++;
2758 if (!isdigit(*options)) {
2759 options[-1] = '\0';
2760 break;
2761 }
2762 }
2763 if (!*this_char)
2764 continue;
2765 if ((value = strchr(this_char,'=')) != NULL) {
2766 *value++ = 0;
2767 } else {
2768 printk(KERN_ERR
2769 "tmpfs: No value for mount option '%s'\n",
2770 this_char);
2771 goto error;
2772 }
2773
2774 if (!strcmp(this_char,"size")) {
2775 unsigned long long size;
2776 size = memparse(value,&rest);
2777 if (*rest == '%') {
2778 size <<= PAGE_SHIFT;
2779 size *= totalram_pages;
2780 do_div(size, 100);
2781 rest++;
2782 }
2783 if (*rest)
2784 goto bad_val;
2785 sbinfo->max_blocks =
2786 DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
2787 } else if (!strcmp(this_char,"nr_blocks")) {
2788 sbinfo->max_blocks = memparse(value, &rest);
2789 if (*rest)
2790 goto bad_val;
2791 } else if (!strcmp(this_char,"nr_inodes")) {
2792 sbinfo->max_inodes = memparse(value, &rest);
2793 if (*rest)
2794 goto bad_val;
2795 } else if (!strcmp(this_char,"mode")) {
2796 if (remount)
2797 continue;
2798 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
2799 if (*rest)
2800 goto bad_val;
2801 } else if (!strcmp(this_char,"uid")) {
2802 if (remount)
2803 continue;
2804 uid = simple_strtoul(value, &rest, 0);
2805 if (*rest)
2806 goto bad_val;
2807 sbinfo->uid = make_kuid(current_user_ns(), uid);
2808 if (!uid_valid(sbinfo->uid))
2809 goto bad_val;
2810 } else if (!strcmp(this_char,"gid")) {
2811 if (remount)
2812 continue;
2813 gid = simple_strtoul(value, &rest, 0);
2814 if (*rest)
2815 goto bad_val;
2816 sbinfo->gid = make_kgid(current_user_ns(), gid);
2817 if (!gid_valid(sbinfo->gid))
2818 goto bad_val;
2819 } else if (!strcmp(this_char,"mpol")) {
2820 mpol_put(mpol);
2821 mpol = NULL;
2822 if (mpol_parse_str(value, &mpol))
2823 goto bad_val;
2824 } else {
2825 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
2826 this_char);
2827 goto error;
2828 }
2829 }
2830 sbinfo->mpol = mpol;
2831 return 0;
2832
2833bad_val:
2834 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
2835 value, this_char);
2836error:
2837 mpol_put(mpol);
2838 return 1;
2839
2840}
2841
2842static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2843{
2844 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2845 struct shmem_sb_info config = *sbinfo;
2846 unsigned long inodes;
2847 int error = -EINVAL;
2848
2849 config.mpol = NULL;
2850 if (shmem_parse_options(data, &config, true))
2851 return error;
2852
2853 spin_lock(&sbinfo->stat_lock);
2854 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
2855 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
2856 goto out;
2857 if (config.max_inodes < inodes)
2858 goto out;
2859
2860
2861
2862
2863
2864 if (config.max_blocks && !sbinfo->max_blocks)
2865 goto out;
2866 if (config.max_inodes && !sbinfo->max_inodes)
2867 goto out;
2868
2869 error = 0;
2870 sbinfo->max_blocks = config.max_blocks;
2871 sbinfo->max_inodes = config.max_inodes;
2872 sbinfo->free_inodes = config.max_inodes - inodes;
2873
2874
2875
2876
2877 if (config.mpol) {
2878 mpol_put(sbinfo->mpol);
2879 sbinfo->mpol = config.mpol;
2880 }
2881out:
2882 spin_unlock(&sbinfo->stat_lock);
2883 return error;
2884}
2885
2886static int shmem_show_options(struct seq_file *seq, struct dentry *root)
2887{
2888 struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
2889
2890 if (sbinfo->max_blocks != shmem_default_max_blocks())
2891 seq_printf(seq, ",size=%luk",
2892 sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
2893 if (sbinfo->max_inodes != shmem_default_max_inodes())
2894 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
2895 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
2896 seq_printf(seq, ",mode=%03ho", sbinfo->mode);
2897 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
2898 seq_printf(seq, ",uid=%u",
2899 from_kuid_munged(&init_user_ns, sbinfo->uid));
2900 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
2901 seq_printf(seq, ",gid=%u",
2902 from_kgid_munged(&init_user_ns, sbinfo->gid));
2903 shmem_show_mpol(seq, sbinfo->mpol);
2904 return 0;
2905}
2906
2907#define MFD_NAME_PREFIX "memfd:"
2908#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
2909#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
2910
2911#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
2912
2913SYSCALL_DEFINE2(memfd_create,
2914 const char __user *, uname,
2915 unsigned int, flags)
2916{
2917 struct shmem_inode_info *info;
2918 struct file *file;
2919 int fd, error;
2920 char *name;
2921 long len;
2922
2923 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
2924 return -EINVAL;
2925
2926
2927 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
2928 if (len <= 0)
2929 return -EFAULT;
2930 if (len > MFD_NAME_MAX_LEN + 1)
2931 return -EINVAL;
2932
2933 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
2934 if (!name)
2935 return -ENOMEM;
2936
2937 strcpy(name, MFD_NAME_PREFIX);
2938 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
2939 error = -EFAULT;
2940 goto err_name;
2941 }
2942
2943
2944 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
2945 error = -EFAULT;
2946 goto err_name;
2947 }
2948
2949 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
2950 if (fd < 0) {
2951 error = fd;
2952 goto err_name;
2953 }
2954
2955 file = shmem_file_setup(name, 0, VM_NORESERVE);
2956 if (IS_ERR(file)) {
2957 error = PTR_ERR(file);
2958 goto err_fd;
2959 }
2960 info = SHMEM_I(file_inode(file));
2961 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
2962 file->f_flags |= O_RDWR | O_LARGEFILE;
2963 if (flags & MFD_ALLOW_SEALING)
2964 info->seals &= ~F_SEAL_SEAL;
2965
2966 fd_install(fd, file);
2967 kfree(name);
2968 return fd;
2969
2970err_fd:
2971 put_unused_fd(fd);
2972err_name:
2973 kfree(name);
2974 return error;
2975}
2976
2977#endif
2978
2979static void shmem_put_super(struct super_block *sb)
2980{
2981 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2982
2983 percpu_counter_destroy(&sbinfo->used_blocks);
2984 mpol_put(sbinfo->mpol);
2985 kfree(sbinfo);
2986 sb->s_fs_info = NULL;
2987}
2988
2989int shmem_fill_super(struct super_block *sb, void *data, int silent)
2990{
2991 struct inode *inode;
2992 struct shmem_sb_info *sbinfo;
2993 int err = -ENOMEM;
2994
2995
2996 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
2997 L1_CACHE_BYTES), GFP_KERNEL);
2998 if (!sbinfo)
2999 return -ENOMEM;
3000
3001 sbinfo->mode = S_IRWXUGO | S_ISVTX;
3002 sbinfo->uid = current_fsuid();
3003 sbinfo->gid = current_fsgid();
3004 sb->s_fs_info = sbinfo;
3005
3006#ifdef CONFIG_TMPFS
3007
3008
3009
3010
3011
3012 if (!(sb->s_flags & MS_KERNMOUNT)) {
3013 sbinfo->max_blocks = shmem_default_max_blocks();
3014 sbinfo->max_inodes = shmem_default_max_inodes();
3015 if (shmem_parse_options(data, sbinfo, false)) {
3016 err = -EINVAL;
3017 goto failed;
3018 }
3019 } else {
3020 sb->s_flags |= MS_NOUSER;
3021 }
3022 sb->s_export_op = &shmem_export_ops;
3023 sb->s_flags |= MS_NOSEC;
3024#else
3025 sb->s_flags |= MS_NOUSER;
3026#endif
3027
3028 spin_lock_init(&sbinfo->stat_lock);
3029 if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
3030 goto failed;
3031 sbinfo->free_inodes = sbinfo->max_inodes;
3032
3033 sb->s_maxbytes = MAX_LFS_FILESIZE;
3034 sb->s_blocksize = PAGE_CACHE_SIZE;
3035 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
3036 sb->s_magic = TMPFS_MAGIC;
3037 sb->s_op = &shmem_ops;
3038 sb->s_time_gran = 1;
3039#ifdef CONFIG_TMPFS_XATTR
3040 sb->s_xattr = shmem_xattr_handlers;
3041#endif
3042#ifdef CONFIG_TMPFS_POSIX_ACL
3043 sb->s_flags |= MS_POSIXACL;
3044#endif
3045
3046 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
3047 if (!inode)
3048 goto failed;
3049 inode->i_uid = sbinfo->uid;
3050 inode->i_gid = sbinfo->gid;
3051 sb->s_root = d_make_root(inode);
3052 if (!sb->s_root)
3053 goto failed;
3054 return 0;
3055
3056failed:
3057 shmem_put_super(sb);
3058 return err;
3059}
3060
3061static struct kmem_cache *shmem_inode_cachep;
3062
3063static struct inode *shmem_alloc_inode(struct super_block *sb)
3064{
3065 struct shmem_inode_info *info;
3066 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
3067 if (!info)
3068 return NULL;
3069 return &info->vfs_inode;
3070}
3071
3072static void shmem_destroy_callback(struct rcu_head *head)
3073{
3074 struct inode *inode = container_of(head, struct inode, i_rcu);
3075 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3076}
3077
3078static void shmem_destroy_inode(struct inode *inode)
3079{
3080 if (S_ISREG(inode->i_mode))
3081 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
3082 call_rcu(&inode->i_rcu, shmem_destroy_callback);
3083}
3084
3085static void shmem_init_inode(void *foo)
3086{
3087 struct shmem_inode_info *info = foo;
3088 inode_init_once(&info->vfs_inode);
3089}
3090
3091static int shmem_init_inodecache(void)
3092{
3093 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
3094 sizeof(struct shmem_inode_info),
3095 0, SLAB_PANIC, shmem_init_inode);
3096 return 0;
3097}
3098
3099static void shmem_destroy_inodecache(void)
3100{
3101 kmem_cache_destroy(shmem_inode_cachep);
3102}
3103
3104static const struct address_space_operations shmem_aops = {
3105 .writepage = shmem_writepage,
3106 .set_page_dirty = __set_page_dirty_no_writeback,
3107#ifdef CONFIG_TMPFS
3108 .write_begin = shmem_write_begin,
3109 .write_end = shmem_write_end,
3110#endif
3111#ifdef CONFIG_MIGRATION
3112 .migratepage = migrate_page,
3113#endif
3114 .error_remove_page = generic_error_remove_page,
3115};
3116
3117static const struct file_operations shmem_file_operations = {
3118 .mmap = shmem_mmap,
3119#ifdef CONFIG_TMPFS
3120 .llseek = shmem_file_llseek,
3121 .read_iter = shmem_file_read_iter,
3122 .write_iter = generic_file_write_iter,
3123 .fsync = noop_fsync,
3124 .splice_read = shmem_file_splice_read,
3125 .splice_write = iter_file_splice_write,
3126 .fallocate = shmem_fallocate,
3127#endif
3128};
3129
3130static const struct inode_operations shmem_inode_operations = {
3131 .setattr = shmem_setattr,
3132#ifdef CONFIG_TMPFS_XATTR
3133 .setxattr = shmem_setxattr,
3134 .getxattr = shmem_getxattr,
3135 .listxattr = shmem_listxattr,
3136 .removexattr = shmem_removexattr,
3137 .set_acl = simple_set_acl,
3138#endif
3139};
3140
3141static const struct inode_operations shmem_dir_inode_operations = {
3142#ifdef CONFIG_TMPFS
3143 .create = shmem_create,
3144 .lookup = simple_lookup,
3145 .link = shmem_link,
3146 .unlink = shmem_unlink,
3147 .symlink = shmem_symlink,
3148 .mkdir = shmem_mkdir,
3149 .rmdir = shmem_rmdir,
3150 .mknod = shmem_mknod,
3151 .rename2 = shmem_rename2,
3152 .tmpfile = shmem_tmpfile,
3153#endif
3154#ifdef CONFIG_TMPFS_XATTR
3155 .setxattr = shmem_setxattr,
3156 .getxattr = shmem_getxattr,
3157 .listxattr = shmem_listxattr,
3158 .removexattr = shmem_removexattr,
3159#endif
3160#ifdef CONFIG_TMPFS_POSIX_ACL
3161 .setattr = shmem_setattr,
3162 .set_acl = simple_set_acl,
3163#endif
3164};
3165
3166static const struct inode_operations shmem_special_inode_operations = {
3167#ifdef CONFIG_TMPFS_XATTR
3168 .setxattr = shmem_setxattr,
3169 .getxattr = shmem_getxattr,
3170 .listxattr = shmem_listxattr,
3171 .removexattr = shmem_removexattr,
3172#endif
3173#ifdef CONFIG_TMPFS_POSIX_ACL
3174 .setattr = shmem_setattr,
3175 .set_acl = simple_set_acl,
3176#endif
3177};
3178
3179static const struct super_operations shmem_ops = {
3180 .alloc_inode = shmem_alloc_inode,
3181 .destroy_inode = shmem_destroy_inode,
3182#ifdef CONFIG_TMPFS
3183 .statfs = shmem_statfs,
3184 .remount_fs = shmem_remount_fs,
3185 .show_options = shmem_show_options,
3186#endif
3187 .evict_inode = shmem_evict_inode,
3188 .drop_inode = generic_delete_inode,
3189 .put_super = shmem_put_super,
3190};
3191
3192static const struct vm_operations_struct shmem_vm_ops = {
3193 .fault = shmem_fault,
3194 .map_pages = filemap_map_pages,
3195#ifdef CONFIG_NUMA
3196 .set_policy = shmem_set_policy,
3197 .get_policy = shmem_get_policy,
3198#endif
3199};
3200
3201static struct dentry *shmem_mount(struct file_system_type *fs_type,
3202 int flags, const char *dev_name, void *data)
3203{
3204 return mount_nodev(fs_type, flags, data, shmem_fill_super);
3205}
3206
3207static struct file_system_type shmem_fs_type = {
3208 .owner = THIS_MODULE,
3209 .name = "tmpfs",
3210 .mount = shmem_mount,
3211 .kill_sb = kill_litter_super,
3212 .fs_flags = FS_USERNS_MOUNT,
3213};
3214
3215int __init shmem_init(void)
3216{
3217 int error;
3218
3219
3220 if (shmem_inode_cachep)
3221 return 0;
3222
3223 error = shmem_init_inodecache();
3224 if (error)
3225 goto out3;
3226
3227 error = register_filesystem(&shmem_fs_type);
3228 if (error) {
3229 printk(KERN_ERR "Could not register tmpfs\n");
3230 goto out2;
3231 }
3232
3233 shm_mnt = kern_mount(&shmem_fs_type);
3234 if (IS_ERR(shm_mnt)) {
3235 error = PTR_ERR(shm_mnt);
3236 printk(KERN_ERR "Could not kern_mount tmpfs\n");
3237 goto out1;
3238 }
3239 return 0;
3240
3241out1:
3242 unregister_filesystem(&shmem_fs_type);
3243out2:
3244 shmem_destroy_inodecache();
3245out3:
3246 shm_mnt = ERR_PTR(error);
3247 return error;
3248}
3249
3250#else
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261static struct file_system_type shmem_fs_type = {
3262 .name = "tmpfs",
3263 .mount = ramfs_mount,
3264 .kill_sb = kill_litter_super,
3265 .fs_flags = FS_USERNS_MOUNT,
3266};
3267
3268int __init shmem_init(void)
3269{
3270 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
3271
3272 shm_mnt = kern_mount(&shmem_fs_type);
3273 BUG_ON(IS_ERR(shm_mnt));
3274
3275 return 0;
3276}
3277
3278int shmem_unuse(swp_entry_t swap, struct page *page)
3279{
3280 return 0;
3281}
3282
3283int shmem_lock(struct file *file, int lock, struct user_struct *user)
3284{
3285 return 0;
3286}
3287
3288void shmem_unlock_mapping(struct address_space *mapping)
3289{
3290}
3291
3292void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
3293{
3294 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
3295}
3296EXPORT_SYMBOL_GPL(shmem_truncate_range);
3297
3298#define shmem_vm_ops generic_file_vm_ops
3299#define shmem_file_operations ramfs_file_operations
3300#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
3301#define shmem_acct_size(flags, size) 0
3302#define shmem_unacct_size(flags, size) do {} while (0)
3303
3304#endif
3305
3306
3307
3308static struct dentry_operations anon_ops = {
3309 .d_dname = simple_dname
3310};
3311
3312static struct file *__shmem_file_setup(const char *name, loff_t size,
3313 unsigned long flags, unsigned int i_flags)
3314{
3315 struct file *res;
3316 struct inode *inode;
3317 struct path path;
3318 struct super_block *sb;
3319 struct qstr this;
3320
3321 if (IS_ERR(shm_mnt))
3322 return ERR_CAST(shm_mnt);
3323
3324 if (size < 0 || size > MAX_LFS_FILESIZE)
3325 return ERR_PTR(-EINVAL);
3326
3327 if (shmem_acct_size(flags, size))
3328 return ERR_PTR(-ENOMEM);
3329
3330 res = ERR_PTR(-ENOMEM);
3331 this.name = name;
3332 this.len = strlen(name);
3333 this.hash = 0;
3334 sb = shm_mnt->mnt_sb;
3335 path.mnt = mntget(shm_mnt);
3336 path.dentry = d_alloc_pseudo(sb, &this);
3337 if (!path.dentry)
3338 goto put_memory;
3339 d_set_d_op(path.dentry, &anon_ops);
3340
3341 res = ERR_PTR(-ENOSPC);
3342 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
3343 if (!inode)
3344 goto put_memory;
3345
3346 inode->i_flags |= i_flags;
3347 d_instantiate(path.dentry, inode);
3348 inode->i_size = size;
3349 clear_nlink(inode);
3350 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
3351 if (IS_ERR(res))
3352 goto put_path;
3353
3354 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
3355 &shmem_file_operations);
3356 if (IS_ERR(res))
3357 goto put_path;
3358
3359 return res;
3360
3361put_memory:
3362 shmem_unacct_size(flags, size);
3363put_path:
3364 path_put(&path);
3365 return res;
3366}
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
3379{
3380 return __shmem_file_setup(name, size, flags, S_PRIVATE);
3381}
3382
3383
3384
3385
3386
3387
3388
3389struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
3390{
3391 return __shmem_file_setup(name, size, flags, 0);
3392}
3393EXPORT_SYMBOL_GPL(shmem_file_setup);
3394
3395
3396
3397
3398
3399int shmem_zero_setup(struct vm_area_struct *vma)
3400{
3401 struct file *file;
3402 loff_t size = vma->vm_end - vma->vm_start;
3403
3404
3405
3406
3407
3408
3409
3410 file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
3411 if (IS_ERR(file))
3412 return PTR_ERR(file);
3413
3414 if (vma->vm_file)
3415 fput(vma->vm_file);
3416 vma->vm_file = file;
3417 vma->vm_ops = &shmem_vm_ops;
3418 return 0;
3419}
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
3437 pgoff_t index, gfp_t gfp)
3438{
3439#ifdef CONFIG_SHMEM
3440 struct inode *inode = mapping->host;
3441 struct page *page;
3442 int error;
3443
3444 BUG_ON(mapping->a_ops != &shmem_aops);
3445 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
3446 if (error)
3447 page = ERR_PTR(error);
3448 else
3449 unlock_page(page);
3450 return page;
3451#else
3452
3453
3454
3455 return read_cache_page_gfp(mapping, index, gfp);
3456#endif
3457}
3458EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
3459