1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/pagemap.h>
30#include <linux/file.h>
31#include <linux/mm.h>
32#include <linux/export.h>
33#include <linux/swap.h>
34#include <linux/aio.h>
35
36static struct vfsmount *shm_mnt;
37
38#ifdef CONFIG_SHMEM
39
40
41
42
43
44
45#include <linux/xattr.h>
46#include <linux/exportfs.h>
47#include <linux/posix_acl.h>
48#include <linux/posix_acl_xattr.h>
49#include <linux/mman.h>
50#include <linux/string.h>
51#include <linux/slab.h>
52#include <linux/backing-dev.h>
53#include <linux/shmem_fs.h>
54#include <linux/writeback.h>
55#include <linux/blkdev.h>
56#include <linux/pagevec.h>
57#include <linux/percpu_counter.h>
58#include <linux/falloc.h>
59#include <linux/splice.h>
60#include <linux/security.h>
61#include <linux/swapops.h>
62#include <linux/mempolicy.h>
63#include <linux/namei.h>
64#include <linux/ctype.h>
65#include <linux/migrate.h>
66#include <linux/highmem.h>
67#include <linux/seq_file.h>
68#include <linux/magic.h>
69#include <linux/syscalls.h>
70#include <linux/fcntl.h>
71#include <uapi/linux/memfd.h>
72
73#include <asm/uaccess.h>
74#include <asm/pgtable.h>
75
76#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
77#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
78
79
80#define BOGO_DIRENT_SIZE 20
81
82
83#define SHORT_SYMLINK_LEN 128
84
85
86
87
88
89
90struct shmem_falloc {
91 wait_queue_head_t *waitq;
92 pgoff_t start;
93 pgoff_t next;
94 pgoff_t nr_falloced;
95 pgoff_t nr_unswapped;
96};
97
98
99enum sgp_type {
100 SGP_READ,
101 SGP_CACHE,
102 SGP_DIRTY,
103 SGP_WRITE,
104 SGP_FALLOC,
105};
106
107#ifdef CONFIG_TMPFS
108static unsigned long shmem_default_max_blocks(void)
109{
110 return totalram_pages / 2;
111}
112
113static unsigned long shmem_default_max_inodes(void)
114{
115 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
116}
117#endif
118
119static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
120static int shmem_replace_page(struct page **pagep, gfp_t gfp,
121 struct shmem_inode_info *info, pgoff_t index);
122static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
123 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
124
125static inline int shmem_getpage(struct inode *inode, pgoff_t index,
126 struct page **pagep, enum sgp_type sgp, int *fault_type)
127{
128 return shmem_getpage_gfp(inode, index, pagep, sgp,
129 mapping_gfp_mask(inode->i_mapping), fault_type);
130}
131
132static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
133{
134 return sb->s_fs_info;
135}
136
137
138
139
140
141
142
143static inline int shmem_acct_size(unsigned long flags, loff_t size)
144{
145 return (flags & VM_NORESERVE) ?
146 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
147}
148
149static inline void shmem_unacct_size(unsigned long flags, loff_t size)
150{
151 if (!(flags & VM_NORESERVE))
152 vm_unacct_memory(VM_ACCT(size));
153}
154
155static inline int shmem_reacct_size(unsigned long flags,
156 loff_t oldsize, loff_t newsize)
157{
158 if (!(flags & VM_NORESERVE)) {
159 if (VM_ACCT(newsize) > VM_ACCT(oldsize))
160 return security_vm_enough_memory_mm(current->mm,
161 VM_ACCT(newsize) - VM_ACCT(oldsize));
162 else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
163 vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
164 }
165 return 0;
166}
167
168
169
170
171
172
173
174static inline int shmem_acct_block(unsigned long flags)
175{
176 return (flags & VM_NORESERVE) ?
177 security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
178}
179
180static inline void shmem_unacct_blocks(unsigned long flags, long pages)
181{
182 if (flags & VM_NORESERVE)
183 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
184}
185
186static const struct super_operations shmem_ops;
187static const struct address_space_operations shmem_aops;
188static const struct file_operations shmem_file_operations;
189static const struct inode_operations shmem_inode_operations;
190static const struct inode_operations shmem_dir_inode_operations;
191static const struct inode_operations shmem_special_inode_operations;
192static const struct vm_operations_struct shmem_vm_ops;
193
194static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
195 .ra_pages = 0,
196 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
197};
198
199static LIST_HEAD(shmem_swaplist);
200static DEFINE_MUTEX(shmem_swaplist_mutex);
201
202static int shmem_reserve_inode(struct super_block *sb)
203{
204 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
205 if (sbinfo->max_inodes) {
206 spin_lock(&sbinfo->stat_lock);
207 if (!sbinfo->free_inodes) {
208 spin_unlock(&sbinfo->stat_lock);
209 return -ENOSPC;
210 }
211 sbinfo->free_inodes--;
212 spin_unlock(&sbinfo->stat_lock);
213 }
214 return 0;
215}
216
217static void shmem_free_inode(struct super_block *sb)
218{
219 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
220 if (sbinfo->max_inodes) {
221 spin_lock(&sbinfo->stat_lock);
222 sbinfo->free_inodes++;
223 spin_unlock(&sbinfo->stat_lock);
224 }
225}
226
227
228
229
230
231
232
233
234
235
236
237
238
239static void shmem_recalc_inode(struct inode *inode)
240{
241 struct shmem_inode_info *info = SHMEM_I(inode);
242 long freed;
243
244 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
245 if (freed > 0) {
246 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
247 if (sbinfo->max_blocks)
248 percpu_counter_add(&sbinfo->used_blocks, -freed);
249 info->alloced -= freed;
250 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
251 shmem_unacct_blocks(info->flags, freed);
252 }
253}
254
255
256
257
258static int shmem_radix_tree_replace(struct address_space *mapping,
259 pgoff_t index, void *expected, void *replacement)
260{
261 void **pslot;
262 void *item;
263
264 VM_BUG_ON(!expected);
265 VM_BUG_ON(!replacement);
266 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
267 if (!pslot)
268 return -ENOENT;
269 item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
270 if (item != expected)
271 return -ENOENT;
272 radix_tree_replace_slot(pslot, replacement);
273 return 0;
274}
275
276
277
278
279
280
281
282
283static bool shmem_confirm_swap(struct address_space *mapping,
284 pgoff_t index, swp_entry_t swap)
285{
286 void *item;
287
288 rcu_read_lock();
289 item = radix_tree_lookup(&mapping->page_tree, index);
290 rcu_read_unlock();
291 return item == swp_to_radix_entry(swap);
292}
293
294
295
296
297static int shmem_add_to_page_cache(struct page *page,
298 struct address_space *mapping,
299 pgoff_t index, void *expected)
300{
301 int error;
302
303 VM_BUG_ON_PAGE(!PageLocked(page), page);
304 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
305
306 page_cache_get(page);
307 page->mapping = mapping;
308 page->index = index;
309
310 spin_lock_irq(&mapping->tree_lock);
311 if (!expected)
312 error = radix_tree_insert(&mapping->page_tree, index, page);
313 else
314 error = shmem_radix_tree_replace(mapping, index, expected,
315 page);
316 if (!error) {
317 mapping->nrpages++;
318 __inc_zone_page_state(page, NR_FILE_PAGES);
319 __inc_zone_page_state(page, NR_SHMEM);
320 spin_unlock_irq(&mapping->tree_lock);
321 } else {
322 page->mapping = NULL;
323 spin_unlock_irq(&mapping->tree_lock);
324 page_cache_release(page);
325 }
326 return error;
327}
328
329
330
331
332static void shmem_delete_from_page_cache(struct page *page, void *radswap)
333{
334 struct address_space *mapping = page->mapping;
335 int error;
336
337 spin_lock_irq(&mapping->tree_lock);
338 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
339 page->mapping = NULL;
340 mapping->nrpages--;
341 __dec_zone_page_state(page, NR_FILE_PAGES);
342 __dec_zone_page_state(page, NR_SHMEM);
343 spin_unlock_irq(&mapping->tree_lock);
344 page_cache_release(page);
345 BUG_ON(error);
346}
347
348
349
350
351static int shmem_free_swap(struct address_space *mapping,
352 pgoff_t index, void *radswap)
353{
354 void *old;
355
356 spin_lock_irq(&mapping->tree_lock);
357 old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
358 spin_unlock_irq(&mapping->tree_lock);
359 if (old != radswap)
360 return -ENOENT;
361 free_swap_and_cache(radix_to_swp_entry(radswap));
362 return 0;
363}
364
365
366
367
368void shmem_unlock_mapping(struct address_space *mapping)
369{
370 struct pagevec pvec;
371 pgoff_t indices[PAGEVEC_SIZE];
372 pgoff_t index = 0;
373
374 pagevec_init(&pvec, 0);
375
376
377
378 while (!mapping_unevictable(mapping)) {
379
380
381
382
383 pvec.nr = find_get_entries(mapping, index,
384 PAGEVEC_SIZE, pvec.pages, indices);
385 if (!pvec.nr)
386 break;
387 index = indices[pvec.nr - 1] + 1;
388 pagevec_remove_exceptionals(&pvec);
389 check_move_unevictable_pages(pvec.pages, pvec.nr);
390 pagevec_release(&pvec);
391 cond_resched();
392 }
393}
394
395
396
397
398
399static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
400 bool unfalloc)
401{
402 struct address_space *mapping = inode->i_mapping;
403 struct shmem_inode_info *info = SHMEM_I(inode);
404 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
405 pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
406 unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
407 unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
408 struct pagevec pvec;
409 pgoff_t indices[PAGEVEC_SIZE];
410 long nr_swaps_freed = 0;
411 pgoff_t index;
412 int i;
413
414 if (lend == -1)
415 end = -1;
416
417 pagevec_init(&pvec, 0);
418 index = start;
419 while (index < end) {
420 pvec.nr = find_get_entries(mapping, index,
421 min(end - index, (pgoff_t)PAGEVEC_SIZE),
422 pvec.pages, indices);
423 if (!pvec.nr)
424 break;
425 for (i = 0; i < pagevec_count(&pvec); i++) {
426 struct page *page = pvec.pages[i];
427
428 index = indices[i];
429 if (index >= end)
430 break;
431
432 if (radix_tree_exceptional_entry(page)) {
433 if (unfalloc)
434 continue;
435 nr_swaps_freed += !shmem_free_swap(mapping,
436 index, page);
437 continue;
438 }
439
440 if (!trylock_page(page))
441 continue;
442 if (!unfalloc || !PageUptodate(page)) {
443 if (page->mapping == mapping) {
444 VM_BUG_ON_PAGE(PageWriteback(page), page);
445 truncate_inode_page(mapping, page);
446 }
447 }
448 unlock_page(page);
449 }
450 pagevec_remove_exceptionals(&pvec);
451 pagevec_release(&pvec);
452 cond_resched();
453 index++;
454 }
455
456 if (partial_start) {
457 struct page *page = NULL;
458 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
459 if (page) {
460 unsigned int top = PAGE_CACHE_SIZE;
461 if (start > end) {
462 top = partial_end;
463 partial_end = 0;
464 }
465 zero_user_segment(page, partial_start, top);
466 set_page_dirty(page);
467 unlock_page(page);
468 page_cache_release(page);
469 }
470 }
471 if (partial_end) {
472 struct page *page = NULL;
473 shmem_getpage(inode, end, &page, SGP_READ, NULL);
474 if (page) {
475 zero_user_segment(page, 0, partial_end);
476 set_page_dirty(page);
477 unlock_page(page);
478 page_cache_release(page);
479 }
480 }
481 if (start >= end)
482 return;
483
484 index = start;
485 while (index < end) {
486 cond_resched();
487
488 pvec.nr = find_get_entries(mapping, index,
489 min(end - index, (pgoff_t)PAGEVEC_SIZE),
490 pvec.pages, indices);
491 if (!pvec.nr) {
492
493 if (index == start || end != -1)
494 break;
495
496 index = start;
497 continue;
498 }
499 for (i = 0; i < pagevec_count(&pvec); i++) {
500 struct page *page = pvec.pages[i];
501
502 index = indices[i];
503 if (index >= end)
504 break;
505
506 if (radix_tree_exceptional_entry(page)) {
507 if (unfalloc)
508 continue;
509 if (shmem_free_swap(mapping, index, page)) {
510
511 index--;
512 break;
513 }
514 nr_swaps_freed++;
515 continue;
516 }
517
518 lock_page(page);
519 if (!unfalloc || !PageUptodate(page)) {
520 if (page->mapping == mapping) {
521 VM_BUG_ON_PAGE(PageWriteback(page), page);
522 truncate_inode_page(mapping, page);
523 } else {
524
525 unlock_page(page);
526 index--;
527 break;
528 }
529 }
530 unlock_page(page);
531 }
532 pagevec_remove_exceptionals(&pvec);
533 pagevec_release(&pvec);
534 index++;
535 }
536
537 spin_lock(&info->lock);
538 info->swapped -= nr_swaps_freed;
539 shmem_recalc_inode(inode);
540 spin_unlock(&info->lock);
541}
542
543void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
544{
545 shmem_undo_range(inode, lstart, lend, false);
546 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
547}
548EXPORT_SYMBOL_GPL(shmem_truncate_range);
549
550static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
551{
552 struct inode *inode = dentry->d_inode;
553 struct shmem_inode_info *info = SHMEM_I(inode);
554 int error;
555
556 error = inode_change_ok(inode, attr);
557 if (error)
558 return error;
559
560 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
561 loff_t oldsize = inode->i_size;
562 loff_t newsize = attr->ia_size;
563
564
565 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
566 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
567 return -EPERM;
568
569 if (newsize != oldsize) {
570 error = shmem_reacct_size(SHMEM_I(inode)->flags,
571 oldsize, newsize);
572 if (error)
573 return error;
574 i_size_write(inode, newsize);
575 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
576 }
577 if (newsize < oldsize) {
578 loff_t holebegin = round_up(newsize, PAGE_SIZE);
579 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
580 shmem_truncate_range(inode, newsize, (loff_t)-1);
581
582 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
583 }
584 }
585
586 setattr_copy(inode, attr);
587 if (attr->ia_valid & ATTR_MODE)
588 error = posix_acl_chmod(inode, inode->i_mode);
589 return error;
590}
591
592static void shmem_evict_inode(struct inode *inode)
593{
594 struct shmem_inode_info *info = SHMEM_I(inode);
595
596 if (inode->i_mapping->a_ops == &shmem_aops) {
597 shmem_unacct_size(info->flags, inode->i_size);
598 inode->i_size = 0;
599 shmem_truncate_range(inode, 0, (loff_t)-1);
600 if (!list_empty(&info->swaplist)) {
601 mutex_lock(&shmem_swaplist_mutex);
602 list_del_init(&info->swaplist);
603 mutex_unlock(&shmem_swaplist_mutex);
604 }
605 } else
606 kfree(info->symlink);
607
608 simple_xattrs_free(&info->xattrs);
609 WARN_ON(inode->i_blocks);
610 shmem_free_inode(inode->i_sb);
611 clear_inode(inode);
612}
613
614
615
616
617static int shmem_unuse_inode(struct shmem_inode_info *info,
618 swp_entry_t swap, struct page **pagep)
619{
620 struct address_space *mapping = info->vfs_inode.i_mapping;
621 void *radswap;
622 pgoff_t index;
623 gfp_t gfp;
624 int error = 0;
625
626 radswap = swp_to_radix_entry(swap);
627 index = radix_tree_locate_item(&mapping->page_tree, radswap);
628 if (index == -1)
629 return -EAGAIN;
630
631
632
633
634
635
636
637 if (shmem_swaplist.next != &info->swaplist)
638 list_move_tail(&shmem_swaplist, &info->swaplist);
639
640 gfp = mapping_gfp_mask(mapping);
641 if (shmem_should_replace_page(*pagep, gfp)) {
642 mutex_unlock(&shmem_swaplist_mutex);
643 error = shmem_replace_page(pagep, gfp, info, index);
644 mutex_lock(&shmem_swaplist_mutex);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663 if (!page_swapcount(*pagep))
664 error = -ENOENT;
665 }
666
667
668
669
670
671
672 if (!error)
673 error = shmem_add_to_page_cache(*pagep, mapping, index,
674 radswap);
675 if (error != -ENOMEM) {
676
677
678
679
680 delete_from_swap_cache(*pagep);
681 set_page_dirty(*pagep);
682 if (!error) {
683 spin_lock(&info->lock);
684 info->swapped--;
685 spin_unlock(&info->lock);
686 swap_free(swap);
687 }
688 }
689 return error;
690}
691
692
693
694
695int shmem_unuse(swp_entry_t swap, struct page *page)
696{
697 struct list_head *this, *next;
698 struct shmem_inode_info *info;
699 struct mem_cgroup *memcg;
700 int error = 0;
701
702
703
704
705
706 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
707 goto out;
708
709
710
711
712
713
714 error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
715 if (error)
716 goto out;
717
718 error = -EAGAIN;
719
720 mutex_lock(&shmem_swaplist_mutex);
721 list_for_each_safe(this, next, &shmem_swaplist) {
722 info = list_entry(this, struct shmem_inode_info, swaplist);
723 if (info->swapped)
724 error = shmem_unuse_inode(info, swap, &page);
725 else
726 list_del_init(&info->swaplist);
727 cond_resched();
728 if (error != -EAGAIN)
729 break;
730
731 }
732 mutex_unlock(&shmem_swaplist_mutex);
733
734 if (error) {
735 if (error != -ENOMEM)
736 error = 0;
737 mem_cgroup_cancel_charge(page, memcg);
738 } else
739 mem_cgroup_commit_charge(page, memcg, true);
740out:
741 unlock_page(page);
742 page_cache_release(page);
743 return error;
744}
745
746
747
748
749static int shmem_writepage(struct page *page, struct writeback_control *wbc)
750{
751 struct shmem_inode_info *info;
752 struct address_space *mapping;
753 struct inode *inode;
754 swp_entry_t swap;
755 pgoff_t index;
756
757 BUG_ON(!PageLocked(page));
758 mapping = page->mapping;
759 index = page->index;
760 inode = mapping->host;
761 info = SHMEM_I(inode);
762 if (info->flags & VM_LOCKED)
763 goto redirty;
764 if (!total_swap_pages)
765 goto redirty;
766
767
768
769
770
771
772
773
774 if (!wbc->for_reclaim) {
775 WARN_ON_ONCE(1);
776 goto redirty;
777 }
778
779
780
781
782
783
784
785
786
787
788
789
790 if (!PageUptodate(page)) {
791 if (inode->i_private) {
792 struct shmem_falloc *shmem_falloc;
793 spin_lock(&inode->i_lock);
794 shmem_falloc = inode->i_private;
795 if (shmem_falloc &&
796 !shmem_falloc->waitq &&
797 index >= shmem_falloc->start &&
798 index < shmem_falloc->next)
799 shmem_falloc->nr_unswapped++;
800 else
801 shmem_falloc = NULL;
802 spin_unlock(&inode->i_lock);
803 if (shmem_falloc)
804 goto redirty;
805 }
806 clear_highpage(page);
807 flush_dcache_page(page);
808 SetPageUptodate(page);
809 }
810
811 swap = get_swap_page();
812 if (!swap.val)
813 goto redirty;
814
815
816
817
818
819
820
821
822
823 mutex_lock(&shmem_swaplist_mutex);
824 if (list_empty(&info->swaplist))
825 list_add_tail(&info->swaplist, &shmem_swaplist);
826
827 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
828 swap_shmem_alloc(swap);
829 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
830
831 spin_lock(&info->lock);
832 info->swapped++;
833 shmem_recalc_inode(inode);
834 spin_unlock(&info->lock);
835
836 mutex_unlock(&shmem_swaplist_mutex);
837 BUG_ON(page_mapped(page));
838 swap_writepage(page, wbc);
839 return 0;
840 }
841
842 mutex_unlock(&shmem_swaplist_mutex);
843 swapcache_free(swap);
844redirty:
845 set_page_dirty(page);
846 if (wbc->for_reclaim)
847 return AOP_WRITEPAGE_ACTIVATE;
848 unlock_page(page);
849 return 0;
850}
851
852#ifdef CONFIG_NUMA
853#ifdef CONFIG_TMPFS
854static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
855{
856 char buffer[64];
857
858 if (!mpol || mpol->mode == MPOL_DEFAULT)
859 return;
860
861 mpol_to_str(buffer, sizeof(buffer), mpol);
862
863 seq_printf(seq, ",mpol=%s", buffer);
864}
865
866static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
867{
868 struct mempolicy *mpol = NULL;
869 if (sbinfo->mpol) {
870 spin_lock(&sbinfo->stat_lock);
871 mpol = sbinfo->mpol;
872 mpol_get(mpol);
873 spin_unlock(&sbinfo->stat_lock);
874 }
875 return mpol;
876}
877#endif
878
879static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
880 struct shmem_inode_info *info, pgoff_t index)
881{
882 struct vm_area_struct pvma;
883 struct page *page;
884
885
886 pvma.vm_start = 0;
887
888 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
889 pvma.vm_ops = NULL;
890 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
891
892 page = swapin_readahead(swap, gfp, &pvma, 0);
893
894
895 mpol_cond_put(pvma.vm_policy);
896
897 return page;
898}
899
900static struct page *shmem_alloc_page(gfp_t gfp,
901 struct shmem_inode_info *info, pgoff_t index)
902{
903 struct vm_area_struct pvma;
904 struct page *page;
905
906
907 pvma.vm_start = 0;
908
909 pvma.vm_pgoff = index + info->vfs_inode.i_ino;
910 pvma.vm_ops = NULL;
911 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
912
913 page = alloc_page_vma(gfp, &pvma, 0);
914
915
916 mpol_cond_put(pvma.vm_policy);
917
918 return page;
919}
920#else
921#ifdef CONFIG_TMPFS
922static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
923{
924}
925#endif
926
927static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
928 struct shmem_inode_info *info, pgoff_t index)
929{
930 return swapin_readahead(swap, gfp, NULL, 0);
931}
932
933static inline struct page *shmem_alloc_page(gfp_t gfp,
934 struct shmem_inode_info *info, pgoff_t index)
935{
936 return alloc_page(gfp);
937}
938#endif
939
940#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
941static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
942{
943 return NULL;
944}
945#endif
946
947
948
949
950
951
952
953
954
955
956
957
958
959static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
960{
961 return page_zonenum(page) > gfp_zone(gfp);
962}
963
964static int shmem_replace_page(struct page **pagep, gfp_t gfp,
965 struct shmem_inode_info *info, pgoff_t index)
966{
967 struct page *oldpage, *newpage;
968 struct address_space *swap_mapping;
969 pgoff_t swap_index;
970 int error;
971
972 oldpage = *pagep;
973 swap_index = page_private(oldpage);
974 swap_mapping = page_mapping(oldpage);
975
976
977
978
979
980 gfp &= ~GFP_CONSTRAINT_MASK;
981 newpage = shmem_alloc_page(gfp, info, index);
982 if (!newpage)
983 return -ENOMEM;
984
985 page_cache_get(newpage);
986 copy_highpage(newpage, oldpage);
987 flush_dcache_page(newpage);
988
989 __set_page_locked(newpage);
990 SetPageUptodate(newpage);
991 SetPageSwapBacked(newpage);
992 set_page_private(newpage, swap_index);
993 SetPageSwapCache(newpage);
994
995
996
997
998
999 spin_lock_irq(&swap_mapping->tree_lock);
1000 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1001 newpage);
1002 if (!error) {
1003 __inc_zone_page_state(newpage, NR_FILE_PAGES);
1004 __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1005 }
1006 spin_unlock_irq(&swap_mapping->tree_lock);
1007
1008 if (unlikely(error)) {
1009
1010
1011
1012
1013
1014 oldpage = newpage;
1015 } else {
1016 mem_cgroup_migrate(oldpage, newpage, false);
1017 lru_cache_add_anon(newpage);
1018 *pagep = newpage;
1019 }
1020
1021 ClearPageSwapCache(oldpage);
1022 set_page_private(oldpage, 0);
1023
1024 unlock_page(oldpage);
1025 page_cache_release(oldpage);
1026 page_cache_release(oldpage);
1027 return error;
1028}
1029
1030
1031
1032
1033
1034
1035
1036
1037static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1038 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1039{
1040 struct address_space *mapping = inode->i_mapping;
1041 struct shmem_inode_info *info;
1042 struct shmem_sb_info *sbinfo;
1043 struct mem_cgroup *memcg;
1044 struct page *page;
1045 swp_entry_t swap;
1046 int error;
1047 int once = 0;
1048 int alloced = 0;
1049
1050 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1051 return -EFBIG;
1052repeat:
1053 swap.val = 0;
1054 page = find_lock_entry(mapping, index);
1055 if (radix_tree_exceptional_entry(page)) {
1056 swap = radix_to_swp_entry(page);
1057 page = NULL;
1058 }
1059
1060 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1061 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1062 error = -EINVAL;
1063 goto failed;
1064 }
1065
1066 if (page && sgp == SGP_WRITE)
1067 mark_page_accessed(page);
1068
1069
1070 if (page && !PageUptodate(page)) {
1071 if (sgp != SGP_READ)
1072 goto clear;
1073 unlock_page(page);
1074 page_cache_release(page);
1075 page = NULL;
1076 }
1077 if (page || (sgp == SGP_READ && !swap.val)) {
1078 *pagep = page;
1079 return 0;
1080 }
1081
1082
1083
1084
1085
1086 info = SHMEM_I(inode);
1087 sbinfo = SHMEM_SB(inode->i_sb);
1088
1089 if (swap.val) {
1090
1091 page = lookup_swap_cache(swap);
1092 if (!page) {
1093
1094 if (fault_type)
1095 *fault_type |= VM_FAULT_MAJOR;
1096 page = shmem_swapin(swap, gfp, info, index);
1097 if (!page) {
1098 error = -ENOMEM;
1099 goto failed;
1100 }
1101 }
1102
1103
1104 lock_page(page);
1105 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1106 !shmem_confirm_swap(mapping, index, swap)) {
1107 error = -EEXIST;
1108 goto unlock;
1109 }
1110 if (!PageUptodate(page)) {
1111 error = -EIO;
1112 goto failed;
1113 }
1114 wait_on_page_writeback(page);
1115
1116 if (shmem_should_replace_page(page, gfp)) {
1117 error = shmem_replace_page(&page, gfp, info, index);
1118 if (error)
1119 goto failed;
1120 }
1121
1122 error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
1123 if (!error) {
1124 error = shmem_add_to_page_cache(page, mapping, index,
1125 swp_to_radix_entry(swap));
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138 if (error) {
1139 mem_cgroup_cancel_charge(page, memcg);
1140 delete_from_swap_cache(page);
1141 }
1142 }
1143 if (error)
1144 goto failed;
1145
1146 mem_cgroup_commit_charge(page, memcg, true);
1147
1148 spin_lock(&info->lock);
1149 info->swapped--;
1150 shmem_recalc_inode(inode);
1151 spin_unlock(&info->lock);
1152
1153 if (sgp == SGP_WRITE)
1154 mark_page_accessed(page);
1155
1156 delete_from_swap_cache(page);
1157 set_page_dirty(page);
1158 swap_free(swap);
1159
1160 } else {
1161 if (shmem_acct_block(info->flags)) {
1162 error = -ENOSPC;
1163 goto failed;
1164 }
1165 if (sbinfo->max_blocks) {
1166 if (percpu_counter_compare(&sbinfo->used_blocks,
1167 sbinfo->max_blocks) >= 0) {
1168 error = -ENOSPC;
1169 goto unacct;
1170 }
1171 percpu_counter_inc(&sbinfo->used_blocks);
1172 }
1173
1174 page = shmem_alloc_page(gfp, info, index);
1175 if (!page) {
1176 error = -ENOMEM;
1177 goto decused;
1178 }
1179
1180 __SetPageSwapBacked(page);
1181 __set_page_locked(page);
1182 if (sgp == SGP_WRITE)
1183 __SetPageReferenced(page);
1184
1185 error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
1186 if (error)
1187 goto decused;
1188 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
1189 if (!error) {
1190 error = shmem_add_to_page_cache(page, mapping, index,
1191 NULL);
1192 radix_tree_preload_end();
1193 }
1194 if (error) {
1195 mem_cgroup_cancel_charge(page, memcg);
1196 goto decused;
1197 }
1198 mem_cgroup_commit_charge(page, memcg, false);
1199 lru_cache_add_anon(page);
1200
1201 spin_lock(&info->lock);
1202 info->alloced++;
1203 inode->i_blocks += BLOCKS_PER_PAGE;
1204 shmem_recalc_inode(inode);
1205 spin_unlock(&info->lock);
1206 alloced = true;
1207
1208
1209
1210
1211 if (sgp == SGP_FALLOC)
1212 sgp = SGP_WRITE;
1213clear:
1214
1215
1216
1217
1218
1219 if (sgp != SGP_WRITE) {
1220 clear_highpage(page);
1221 flush_dcache_page(page);
1222 SetPageUptodate(page);
1223 }
1224 if (sgp == SGP_DIRTY)
1225 set_page_dirty(page);
1226 }
1227
1228
1229 if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1230 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1231 error = -EINVAL;
1232 if (alloced)
1233 goto trunc;
1234 else
1235 goto failed;
1236 }
1237 *pagep = page;
1238 return 0;
1239
1240
1241
1242
1243trunc:
1244 info = SHMEM_I(inode);
1245 ClearPageDirty(page);
1246 delete_from_page_cache(page);
1247 spin_lock(&info->lock);
1248 info->alloced--;
1249 inode->i_blocks -= BLOCKS_PER_PAGE;
1250 spin_unlock(&info->lock);
1251decused:
1252 sbinfo = SHMEM_SB(inode->i_sb);
1253 if (sbinfo->max_blocks)
1254 percpu_counter_add(&sbinfo->used_blocks, -1);
1255unacct:
1256 shmem_unacct_blocks(info->flags, 1);
1257failed:
1258 if (swap.val && error != -EINVAL &&
1259 !shmem_confirm_swap(mapping, index, swap))
1260 error = -EEXIST;
1261unlock:
1262 if (page) {
1263 unlock_page(page);
1264 page_cache_release(page);
1265 }
1266 if (error == -ENOSPC && !once++) {
1267 info = SHMEM_I(inode);
1268 spin_lock(&info->lock);
1269 shmem_recalc_inode(inode);
1270 spin_unlock(&info->lock);
1271 goto repeat;
1272 }
1273 if (error == -EEXIST)
1274 goto repeat;
1275 return error;
1276}
1277
1278static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1279{
1280 struct inode *inode = file_inode(vma->vm_file);
1281 int error;
1282 int ret = VM_FAULT_LOCKED;
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301 if (unlikely(inode->i_private)) {
1302 struct shmem_falloc *shmem_falloc;
1303
1304 spin_lock(&inode->i_lock);
1305 shmem_falloc = inode->i_private;
1306 if (shmem_falloc &&
1307 shmem_falloc->waitq &&
1308 vmf->pgoff >= shmem_falloc->start &&
1309 vmf->pgoff < shmem_falloc->next) {
1310 wait_queue_head_t *shmem_falloc_waitq;
1311 DEFINE_WAIT(shmem_fault_wait);
1312
1313 ret = VM_FAULT_NOPAGE;
1314 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1315 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1316
1317 up_read(&vma->vm_mm->mmap_sem);
1318 ret = VM_FAULT_RETRY;
1319 }
1320
1321 shmem_falloc_waitq = shmem_falloc->waitq;
1322 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1323 TASK_UNINTERRUPTIBLE);
1324 spin_unlock(&inode->i_lock);
1325 schedule();
1326
1327
1328
1329
1330
1331
1332
1333
1334 spin_lock(&inode->i_lock);
1335 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1336 spin_unlock(&inode->i_lock);
1337 return ret;
1338 }
1339 spin_unlock(&inode->i_lock);
1340 }
1341
1342 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1343 if (error)
1344 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1345
1346 if (ret & VM_FAULT_MAJOR) {
1347 count_vm_event(PGMAJFAULT);
1348 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
1349 }
1350 return ret;
1351}
1352
1353#ifdef CONFIG_NUMA
1354static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1355{
1356 struct inode *inode = file_inode(vma->vm_file);
1357 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1358}
1359
1360static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1361 unsigned long addr)
1362{
1363 struct inode *inode = file_inode(vma->vm_file);
1364 pgoff_t index;
1365
1366 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1367 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1368}
1369#endif
1370
1371int shmem_lock(struct file *file, int lock, struct user_struct *user)
1372{
1373 struct inode *inode = file_inode(file);
1374 struct shmem_inode_info *info = SHMEM_I(inode);
1375 int retval = -ENOMEM;
1376
1377 spin_lock(&info->lock);
1378 if (lock && !(info->flags & VM_LOCKED)) {
1379 if (!user_shm_lock(inode->i_size, user))
1380 goto out_nomem;
1381 info->flags |= VM_LOCKED;
1382 mapping_set_unevictable(file->f_mapping);
1383 }
1384 if (!lock && (info->flags & VM_LOCKED) && user) {
1385 user_shm_unlock(inode->i_size, user);
1386 info->flags &= ~VM_LOCKED;
1387 mapping_clear_unevictable(file->f_mapping);
1388 }
1389 retval = 0;
1390
1391out_nomem:
1392 spin_unlock(&info->lock);
1393 return retval;
1394}
1395
1396static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1397{
1398 file_accessed(file);
1399 vma->vm_ops = &shmem_vm_ops;
1400 return 0;
1401}
1402
1403static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1404 umode_t mode, dev_t dev, unsigned long flags)
1405{
1406 struct inode *inode;
1407 struct shmem_inode_info *info;
1408 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1409
1410 if (shmem_reserve_inode(sb))
1411 return NULL;
1412
1413 inode = new_inode(sb);
1414 if (inode) {
1415 inode->i_ino = get_next_ino();
1416 inode_init_owner(inode, dir, mode);
1417 inode->i_blocks = 0;
1418 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1419 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1420 inode->i_generation = get_seconds();
1421 info = SHMEM_I(inode);
1422 memset(info, 0, (char *)inode - (char *)info);
1423 spin_lock_init(&info->lock);
1424 info->seals = F_SEAL_SEAL;
1425 info->flags = flags & VM_NORESERVE;
1426 INIT_LIST_HEAD(&info->swaplist);
1427 simple_xattrs_init(&info->xattrs);
1428 cache_no_acl(inode);
1429
1430 switch (mode & S_IFMT) {
1431 default:
1432 inode->i_op = &shmem_special_inode_operations;
1433 init_special_inode(inode, mode, dev);
1434 break;
1435 case S_IFREG:
1436 inode->i_mapping->a_ops = &shmem_aops;
1437 inode->i_op = &shmem_inode_operations;
1438 inode->i_fop = &shmem_file_operations;
1439 mpol_shared_policy_init(&info->policy,
1440 shmem_get_sbmpol(sbinfo));
1441 break;
1442 case S_IFDIR:
1443 inc_nlink(inode);
1444
1445 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1446 inode->i_op = &shmem_dir_inode_operations;
1447 inode->i_fop = &simple_dir_operations;
1448 break;
1449 case S_IFLNK:
1450
1451
1452
1453
1454 mpol_shared_policy_init(&info->policy, NULL);
1455 break;
1456 }
1457 } else
1458 shmem_free_inode(sb);
1459 return inode;
1460}
1461
1462bool shmem_mapping(struct address_space *mapping)
1463{
1464 return mapping->backing_dev_info == &shmem_backing_dev_info;
1465}
1466
1467#ifdef CONFIG_TMPFS
1468static const struct inode_operations shmem_symlink_inode_operations;
1469static const struct inode_operations shmem_short_symlink_operations;
1470
1471#ifdef CONFIG_TMPFS_XATTR
1472static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1473#else
1474#define shmem_initxattrs NULL
1475#endif
1476
1477static int
1478shmem_write_begin(struct file *file, struct address_space *mapping,
1479 loff_t pos, unsigned len, unsigned flags,
1480 struct page **pagep, void **fsdata)
1481{
1482 struct inode *inode = mapping->host;
1483 struct shmem_inode_info *info = SHMEM_I(inode);
1484 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1485
1486
1487 if (unlikely(info->seals)) {
1488 if (info->seals & F_SEAL_WRITE)
1489 return -EPERM;
1490 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1491 return -EPERM;
1492 }
1493
1494 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1495}
1496
1497static int
1498shmem_write_end(struct file *file, struct address_space *mapping,
1499 loff_t pos, unsigned len, unsigned copied,
1500 struct page *page, void *fsdata)
1501{
1502 struct inode *inode = mapping->host;
1503
1504 if (pos + copied > inode->i_size)
1505 i_size_write(inode, pos + copied);
1506
1507 if (!PageUptodate(page)) {
1508 if (copied < PAGE_CACHE_SIZE) {
1509 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1510 zero_user_segments(page, 0, from,
1511 from + copied, PAGE_CACHE_SIZE);
1512 }
1513 SetPageUptodate(page);
1514 }
1515 set_page_dirty(page);
1516 unlock_page(page);
1517 page_cache_release(page);
1518
1519 return copied;
1520}
1521
1522static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1523{
1524 struct file *file = iocb->ki_filp;
1525 struct inode *inode = file_inode(file);
1526 struct address_space *mapping = inode->i_mapping;
1527 pgoff_t index;
1528 unsigned long offset;
1529 enum sgp_type sgp = SGP_READ;
1530 int error = 0;
1531 ssize_t retval = 0;
1532 loff_t *ppos = &iocb->ki_pos;
1533
1534
1535
1536
1537
1538
1539 if (segment_eq(get_fs(), KERNEL_DS))
1540 sgp = SGP_DIRTY;
1541
1542 index = *ppos >> PAGE_CACHE_SHIFT;
1543 offset = *ppos & ~PAGE_CACHE_MASK;
1544
1545 for (;;) {
1546 struct page *page = NULL;
1547 pgoff_t end_index;
1548 unsigned long nr, ret;
1549 loff_t i_size = i_size_read(inode);
1550
1551 end_index = i_size >> PAGE_CACHE_SHIFT;
1552 if (index > end_index)
1553 break;
1554 if (index == end_index) {
1555 nr = i_size & ~PAGE_CACHE_MASK;
1556 if (nr <= offset)
1557 break;
1558 }
1559
1560 error = shmem_getpage(inode, index, &page, sgp, NULL);
1561 if (error) {
1562 if (error == -EINVAL)
1563 error = 0;
1564 break;
1565 }
1566 if (page)
1567 unlock_page(page);
1568
1569
1570
1571
1572
1573 nr = PAGE_CACHE_SIZE;
1574 i_size = i_size_read(inode);
1575 end_index = i_size >> PAGE_CACHE_SHIFT;
1576 if (index == end_index) {
1577 nr = i_size & ~PAGE_CACHE_MASK;
1578 if (nr <= offset) {
1579 if (page)
1580 page_cache_release(page);
1581 break;
1582 }
1583 }
1584 nr -= offset;
1585
1586 if (page) {
1587
1588
1589
1590
1591
1592 if (mapping_writably_mapped(mapping))
1593 flush_dcache_page(page);
1594
1595
1596
1597 if (!offset)
1598 mark_page_accessed(page);
1599 } else {
1600 page = ZERO_PAGE(0);
1601 page_cache_get(page);
1602 }
1603
1604
1605
1606
1607
1608 ret = copy_page_to_iter(page, offset, nr, to);
1609 retval += ret;
1610 offset += ret;
1611 index += offset >> PAGE_CACHE_SHIFT;
1612 offset &= ~PAGE_CACHE_MASK;
1613
1614 page_cache_release(page);
1615 if (!iov_iter_count(to))
1616 break;
1617 if (ret < nr) {
1618 error = -EFAULT;
1619 break;
1620 }
1621 cond_resched();
1622 }
1623
1624 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1625 file_accessed(file);
1626 return retval ? retval : error;
1627}
1628
1629static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1630 struct pipe_inode_info *pipe, size_t len,
1631 unsigned int flags)
1632{
1633 struct address_space *mapping = in->f_mapping;
1634 struct inode *inode = mapping->host;
1635 unsigned int loff, nr_pages, req_pages;
1636 struct page *pages[PIPE_DEF_BUFFERS];
1637 struct partial_page partial[PIPE_DEF_BUFFERS];
1638 struct page *page;
1639 pgoff_t index, end_index;
1640 loff_t isize, left;
1641 int error, page_nr;
1642 struct splice_pipe_desc spd = {
1643 .pages = pages,
1644 .partial = partial,
1645 .nr_pages_max = PIPE_DEF_BUFFERS,
1646 .flags = flags,
1647 .ops = &page_cache_pipe_buf_ops,
1648 .spd_release = spd_release_page,
1649 };
1650
1651 isize = i_size_read(inode);
1652 if (unlikely(*ppos >= isize))
1653 return 0;
1654
1655 left = isize - *ppos;
1656 if (unlikely(left < len))
1657 len = left;
1658
1659 if (splice_grow_spd(pipe, &spd))
1660 return -ENOMEM;
1661
1662 index = *ppos >> PAGE_CACHE_SHIFT;
1663 loff = *ppos & ~PAGE_CACHE_MASK;
1664 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1665 nr_pages = min(req_pages, spd.nr_pages_max);
1666
1667 spd.nr_pages = find_get_pages_contig(mapping, index,
1668 nr_pages, spd.pages);
1669 index += spd.nr_pages;
1670 error = 0;
1671
1672 while (spd.nr_pages < nr_pages) {
1673 error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1674 if (error)
1675 break;
1676 unlock_page(page);
1677 spd.pages[spd.nr_pages++] = page;
1678 index++;
1679 }
1680
1681 index = *ppos >> PAGE_CACHE_SHIFT;
1682 nr_pages = spd.nr_pages;
1683 spd.nr_pages = 0;
1684
1685 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1686 unsigned int this_len;
1687
1688 if (!len)
1689 break;
1690
1691 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1692 page = spd.pages[page_nr];
1693
1694 if (!PageUptodate(page) || page->mapping != mapping) {
1695 error = shmem_getpage(inode, index, &page,
1696 SGP_CACHE, NULL);
1697 if (error)
1698 break;
1699 unlock_page(page);
1700 page_cache_release(spd.pages[page_nr]);
1701 spd.pages[page_nr] = page;
1702 }
1703
1704 isize = i_size_read(inode);
1705 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1706 if (unlikely(!isize || index > end_index))
1707 break;
1708
1709 if (end_index == index) {
1710 unsigned int plen;
1711
1712 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1713 if (plen <= loff)
1714 break;
1715
1716 this_len = min(this_len, plen - loff);
1717 len = this_len;
1718 }
1719
1720 spd.partial[page_nr].offset = loff;
1721 spd.partial[page_nr].len = this_len;
1722 len -= this_len;
1723 loff = 0;
1724 spd.nr_pages++;
1725 index++;
1726 }
1727
1728 while (page_nr < nr_pages)
1729 page_cache_release(spd.pages[page_nr++]);
1730
1731 if (spd.nr_pages)
1732 error = splice_to_pipe(pipe, &spd);
1733
1734 splice_shrink_spd(&spd);
1735
1736 if (error > 0) {
1737 *ppos += error;
1738 file_accessed(in);
1739 }
1740 return error;
1741}
1742
1743
1744
1745
1746static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1747 pgoff_t index, pgoff_t end, int whence)
1748{
1749 struct page *page;
1750 struct pagevec pvec;
1751 pgoff_t indices[PAGEVEC_SIZE];
1752 bool done = false;
1753 int i;
1754
1755 pagevec_init(&pvec, 0);
1756 pvec.nr = 1;
1757 while (!done) {
1758 pvec.nr = find_get_entries(mapping, index,
1759 pvec.nr, pvec.pages, indices);
1760 if (!pvec.nr) {
1761 if (whence == SEEK_DATA)
1762 index = end;
1763 break;
1764 }
1765 for (i = 0; i < pvec.nr; i++, index++) {
1766 if (index < indices[i]) {
1767 if (whence == SEEK_HOLE) {
1768 done = true;
1769 break;
1770 }
1771 index = indices[i];
1772 }
1773 page = pvec.pages[i];
1774 if (page && !radix_tree_exceptional_entry(page)) {
1775 if (!PageUptodate(page))
1776 page = NULL;
1777 }
1778 if (index >= end ||
1779 (page && whence == SEEK_DATA) ||
1780 (!page && whence == SEEK_HOLE)) {
1781 done = true;
1782 break;
1783 }
1784 }
1785 pagevec_remove_exceptionals(&pvec);
1786 pagevec_release(&pvec);
1787 pvec.nr = PAGEVEC_SIZE;
1788 cond_resched();
1789 }
1790 return index;
1791}
1792
1793static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
1794{
1795 struct address_space *mapping = file->f_mapping;
1796 struct inode *inode = mapping->host;
1797 pgoff_t start, end;
1798 loff_t new_offset;
1799
1800 if (whence != SEEK_DATA && whence != SEEK_HOLE)
1801 return generic_file_llseek_size(file, offset, whence,
1802 MAX_LFS_FILESIZE, i_size_read(inode));
1803 mutex_lock(&inode->i_mutex);
1804
1805
1806 if (offset < 0)
1807 offset = -EINVAL;
1808 else if (offset >= inode->i_size)
1809 offset = -ENXIO;
1810 else {
1811 start = offset >> PAGE_CACHE_SHIFT;
1812 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1813 new_offset = shmem_seek_hole_data(mapping, start, end, whence);
1814 new_offset <<= PAGE_CACHE_SHIFT;
1815 if (new_offset > offset) {
1816 if (new_offset < inode->i_size)
1817 offset = new_offset;
1818 else if (whence == SEEK_DATA)
1819 offset = -ENXIO;
1820 else
1821 offset = inode->i_size;
1822 }
1823 }
1824
1825 if (offset >= 0)
1826 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
1827 mutex_unlock(&inode->i_mutex);
1828 return offset;
1829}
1830
1831
1832
1833
1834
1835#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
1836#define LAST_SCAN 4
1837
1838static void shmem_tag_pins(struct address_space *mapping)
1839{
1840 struct radix_tree_iter iter;
1841 void **slot;
1842 pgoff_t start;
1843 struct page *page;
1844
1845 lru_add_drain();
1846 start = 0;
1847 rcu_read_lock();
1848
1849restart:
1850 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1851 page = radix_tree_deref_slot(slot);
1852 if (!page || radix_tree_exception(page)) {
1853 if (radix_tree_deref_retry(page))
1854 goto restart;
1855 } else if (page_count(page) - page_mapcount(page) > 1) {
1856 spin_lock_irq(&mapping->tree_lock);
1857 radix_tree_tag_set(&mapping->page_tree, iter.index,
1858 SHMEM_TAG_PINNED);
1859 spin_unlock_irq(&mapping->tree_lock);
1860 }
1861
1862 if (need_resched()) {
1863 cond_resched_rcu();
1864 start = iter.index + 1;
1865 goto restart;
1866 }
1867 }
1868 rcu_read_unlock();
1869}
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880static int shmem_wait_for_pins(struct address_space *mapping)
1881{
1882 struct radix_tree_iter iter;
1883 void **slot;
1884 pgoff_t start;
1885 struct page *page;
1886 int error, scan;
1887
1888 shmem_tag_pins(mapping);
1889
1890 error = 0;
1891 for (scan = 0; scan <= LAST_SCAN; scan++) {
1892 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
1893 break;
1894
1895 if (!scan)
1896 lru_add_drain_all();
1897 else if (schedule_timeout_killable((HZ << scan) / 200))
1898 scan = LAST_SCAN;
1899
1900 start = 0;
1901 rcu_read_lock();
1902restart:
1903 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
1904 start, SHMEM_TAG_PINNED) {
1905
1906 page = radix_tree_deref_slot(slot);
1907 if (radix_tree_exception(page)) {
1908 if (radix_tree_deref_retry(page))
1909 goto restart;
1910
1911 page = NULL;
1912 }
1913
1914 if (page &&
1915 page_count(page) - page_mapcount(page) != 1) {
1916 if (scan < LAST_SCAN)
1917 goto continue_resched;
1918
1919
1920
1921
1922
1923
1924 error = -EBUSY;
1925 }
1926
1927 spin_lock_irq(&mapping->tree_lock);
1928 radix_tree_tag_clear(&mapping->page_tree,
1929 iter.index, SHMEM_TAG_PINNED);
1930 spin_unlock_irq(&mapping->tree_lock);
1931continue_resched:
1932 if (need_resched()) {
1933 cond_resched_rcu();
1934 start = iter.index + 1;
1935 goto restart;
1936 }
1937 }
1938 rcu_read_unlock();
1939 }
1940
1941 return error;
1942}
1943
1944#define F_ALL_SEALS (F_SEAL_SEAL | \
1945 F_SEAL_SHRINK | \
1946 F_SEAL_GROW | \
1947 F_SEAL_WRITE)
1948
1949int shmem_add_seals(struct file *file, unsigned int seals)
1950{
1951 struct inode *inode = file_inode(file);
1952 struct shmem_inode_info *info = SHMEM_I(inode);
1953 int error;
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985 if (file->f_op != &shmem_file_operations)
1986 return -EINVAL;
1987 if (!(file->f_mode & FMODE_WRITE))
1988 return -EPERM;
1989 if (seals & ~(unsigned int)F_ALL_SEALS)
1990 return -EINVAL;
1991
1992 mutex_lock(&inode->i_mutex);
1993
1994 if (info->seals & F_SEAL_SEAL) {
1995 error = -EPERM;
1996 goto unlock;
1997 }
1998
1999 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
2000 error = mapping_deny_writable(file->f_mapping);
2001 if (error)
2002 goto unlock;
2003
2004 error = shmem_wait_for_pins(file->f_mapping);
2005 if (error) {
2006 mapping_allow_writable(file->f_mapping);
2007 goto unlock;
2008 }
2009 }
2010
2011 info->seals |= seals;
2012 error = 0;
2013
2014unlock:
2015 mutex_unlock(&inode->i_mutex);
2016 return error;
2017}
2018EXPORT_SYMBOL_GPL(shmem_add_seals);
2019
2020int shmem_get_seals(struct file *file)
2021{
2022 if (file->f_op != &shmem_file_operations)
2023 return -EINVAL;
2024
2025 return SHMEM_I(file_inode(file))->seals;
2026}
2027EXPORT_SYMBOL_GPL(shmem_get_seals);
2028
2029long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2030{
2031 long error;
2032
2033 switch (cmd) {
2034 case F_ADD_SEALS:
2035
2036 if (arg > UINT_MAX)
2037 return -EINVAL;
2038
2039 error = shmem_add_seals(file, arg);
2040 break;
2041 case F_GET_SEALS:
2042 error = shmem_get_seals(file);
2043 break;
2044 default:
2045 error = -EINVAL;
2046 break;
2047 }
2048
2049 return error;
2050}
2051
2052static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2053 loff_t len)
2054{
2055 struct inode *inode = file_inode(file);
2056 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2057 struct shmem_inode_info *info = SHMEM_I(inode);
2058 struct shmem_falloc shmem_falloc;
2059 pgoff_t start, index, end;
2060 int error;
2061
2062 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2063 return -EOPNOTSUPP;
2064
2065 mutex_lock(&inode->i_mutex);
2066
2067 if (mode & FALLOC_FL_PUNCH_HOLE) {
2068 struct address_space *mapping = file->f_mapping;
2069 loff_t unmap_start = round_up(offset, PAGE_SIZE);
2070 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
2071 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
2072
2073
2074 if (info->seals & F_SEAL_WRITE) {
2075 error = -EPERM;
2076 goto out;
2077 }
2078
2079 shmem_falloc.waitq = &shmem_falloc_waitq;
2080 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
2081 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
2082 spin_lock(&inode->i_lock);
2083 inode->i_private = &shmem_falloc;
2084 spin_unlock(&inode->i_lock);
2085
2086 if ((u64)unmap_end > (u64)unmap_start)
2087 unmap_mapping_range(mapping, unmap_start,
2088 1 + unmap_end - unmap_start, 0);
2089 shmem_truncate_range(inode, offset, offset + len - 1);
2090
2091
2092 spin_lock(&inode->i_lock);
2093 inode->i_private = NULL;
2094 wake_up_all(&shmem_falloc_waitq);
2095 spin_unlock(&inode->i_lock);
2096 error = 0;
2097 goto out;
2098 }
2099
2100
2101 error = inode_newsize_ok(inode, offset + len);
2102 if (error)
2103 goto out;
2104
2105 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2106 error = -EPERM;
2107 goto out;
2108 }
2109
2110 start = offset >> PAGE_CACHE_SHIFT;
2111 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2112
2113 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
2114 error = -ENOSPC;
2115 goto out;
2116 }
2117
2118 shmem_falloc.waitq = NULL;
2119 shmem_falloc.start = start;
2120 shmem_falloc.next = start;
2121 shmem_falloc.nr_falloced = 0;
2122 shmem_falloc.nr_unswapped = 0;
2123 spin_lock(&inode->i_lock);
2124 inode->i_private = &shmem_falloc;
2125 spin_unlock(&inode->i_lock);
2126
2127 for (index = start; index < end; index++) {
2128 struct page *page;
2129
2130
2131
2132
2133
2134 if (signal_pending(current))
2135 error = -EINTR;
2136 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
2137 error = -ENOMEM;
2138 else
2139 error = shmem_getpage(inode, index, &page, SGP_FALLOC,
2140 NULL);
2141 if (error) {
2142
2143 shmem_undo_range(inode,
2144 (loff_t)start << PAGE_CACHE_SHIFT,
2145 (loff_t)index << PAGE_CACHE_SHIFT, true);
2146 goto undone;
2147 }
2148
2149
2150
2151
2152
2153 shmem_falloc.next++;
2154 if (!PageUptodate(page))
2155 shmem_falloc.nr_falloced++;
2156
2157
2158
2159
2160
2161
2162
2163
2164 set_page_dirty(page);
2165 unlock_page(page);
2166 page_cache_release(page);
2167 cond_resched();
2168 }
2169
2170 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
2171 i_size_write(inode, offset + len);
2172 inode->i_ctime = CURRENT_TIME;
2173undone:
2174 spin_lock(&inode->i_lock);
2175 inode->i_private = NULL;
2176 spin_unlock(&inode->i_lock);
2177out:
2178 mutex_unlock(&inode->i_mutex);
2179 return error;
2180}
2181
2182static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
2183{
2184 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
2185
2186 buf->f_type = TMPFS_MAGIC;
2187 buf->f_bsize = PAGE_CACHE_SIZE;
2188 buf->f_namelen = NAME_MAX;
2189 if (sbinfo->max_blocks) {
2190 buf->f_blocks = sbinfo->max_blocks;
2191 buf->f_bavail =
2192 buf->f_bfree = sbinfo->max_blocks -
2193 percpu_counter_sum(&sbinfo->used_blocks);
2194 }
2195 if (sbinfo->max_inodes) {
2196 buf->f_files = sbinfo->max_inodes;
2197 buf->f_ffree = sbinfo->free_inodes;
2198 }
2199
2200 return 0;
2201}
2202
2203
2204
2205
2206static int
2207shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2208{
2209 struct inode *inode;
2210 int error = -ENOSPC;
2211
2212 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
2213 if (inode) {
2214 error = simple_acl_create(dir, inode);
2215 if (error)
2216 goto out_iput;
2217 error = security_inode_init_security(inode, dir,
2218 &dentry->d_name,
2219 shmem_initxattrs, NULL);
2220 if (error && error != -EOPNOTSUPP)
2221 goto out_iput;
2222
2223 error = 0;
2224 dir->i_size += BOGO_DIRENT_SIZE;
2225 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2226 d_instantiate(dentry, inode);
2227 dget(dentry);
2228 }
2229 return error;
2230out_iput:
2231 iput(inode);
2232 return error;
2233}
2234
2235static int
2236shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2237{
2238 struct inode *inode;
2239 int error = -ENOSPC;
2240
2241 inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
2242 if (inode) {
2243 error = security_inode_init_security(inode, dir,
2244 NULL,
2245 shmem_initxattrs, NULL);
2246 if (error && error != -EOPNOTSUPP)
2247 goto out_iput;
2248 error = simple_acl_create(dir, inode);
2249 if (error)
2250 goto out_iput;
2251 d_tmpfile(dentry, inode);
2252 }
2253 return error;
2254out_iput:
2255 iput(inode);
2256 return error;
2257}
2258
2259static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2260{
2261 int error;
2262
2263 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
2264 return error;
2265 inc_nlink(dir);
2266 return 0;
2267}
2268
2269static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2270 bool excl)
2271{
2272 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
2273}
2274
2275
2276
2277
2278static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2279{
2280 struct inode *inode = old_dentry->d_inode;
2281 int ret;
2282
2283
2284
2285
2286
2287
2288 ret = shmem_reserve_inode(inode->i_sb);
2289 if (ret)
2290 goto out;
2291
2292 dir->i_size += BOGO_DIRENT_SIZE;
2293 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2294 inc_nlink(inode);
2295 ihold(inode);
2296 dget(dentry);
2297 d_instantiate(dentry, inode);
2298out:
2299 return ret;
2300}
2301
2302static int shmem_unlink(struct inode *dir, struct dentry *dentry)
2303{
2304 struct inode *inode = dentry->d_inode;
2305
2306 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
2307 shmem_free_inode(inode->i_sb);
2308
2309 dir->i_size -= BOGO_DIRENT_SIZE;
2310 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2311 drop_nlink(inode);
2312 dput(dentry);
2313 return 0;
2314}
2315
2316static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
2317{
2318 if (!simple_empty(dentry))
2319 return -ENOTEMPTY;
2320
2321 drop_nlink(dentry->d_inode);
2322 drop_nlink(dir);
2323 return shmem_unlink(dir, dentry);
2324}
2325
2326static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
2327{
2328 bool old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2329 bool new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode);
2330
2331 if (old_dir != new_dir && old_is_dir != new_is_dir) {
2332 if (old_is_dir) {
2333 drop_nlink(old_dir);
2334 inc_nlink(new_dir);
2335 } else {
2336 drop_nlink(new_dir);
2337 inc_nlink(old_dir);
2338 }
2339 }
2340 old_dir->i_ctime = old_dir->i_mtime =
2341 new_dir->i_ctime = new_dir->i_mtime =
2342 old_dentry->d_inode->i_ctime =
2343 new_dentry->d_inode->i_ctime = CURRENT_TIME;
2344
2345 return 0;
2346}
2347
2348static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2349{
2350 struct dentry *whiteout;
2351 int error;
2352
2353 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2354 if (!whiteout)
2355 return -ENOMEM;
2356
2357 error = shmem_mknod(old_dir, whiteout,
2358 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2359 dput(whiteout);
2360 if (error)
2361 return error;
2362
2363
2364
2365
2366
2367
2368
2369
2370 d_rehash(whiteout);
2371 return 0;
2372}
2373
2374
2375
2376
2377
2378
2379
2380static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
2381{
2382 struct inode *inode = old_dentry->d_inode;
2383 int they_are_dirs = S_ISDIR(inode->i_mode);
2384
2385 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2386 return -EINVAL;
2387
2388 if (flags & RENAME_EXCHANGE)
2389 return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
2390
2391 if (!simple_empty(new_dentry))
2392 return -ENOTEMPTY;
2393
2394 if (flags & RENAME_WHITEOUT) {
2395 int error;
2396
2397 error = shmem_whiteout(old_dir, old_dentry);
2398 if (error)
2399 return error;
2400 }
2401
2402 if (new_dentry->d_inode) {
2403 (void) shmem_unlink(new_dir, new_dentry);
2404 if (they_are_dirs) {
2405 drop_nlink(new_dentry->d_inode);
2406 drop_nlink(old_dir);
2407 }
2408 } else if (they_are_dirs) {
2409 drop_nlink(old_dir);
2410 inc_nlink(new_dir);
2411 }
2412
2413 old_dir->i_size -= BOGO_DIRENT_SIZE;
2414 new_dir->i_size += BOGO_DIRENT_SIZE;
2415 old_dir->i_ctime = old_dir->i_mtime =
2416 new_dir->i_ctime = new_dir->i_mtime =
2417 inode->i_ctime = CURRENT_TIME;
2418 return 0;
2419}
2420
2421static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2422{
2423 int error;
2424 int len;
2425 struct inode *inode;
2426 struct page *page;
2427 char *kaddr;
2428 struct shmem_inode_info *info;
2429
2430 len = strlen(symname) + 1;
2431 if (len > PAGE_CACHE_SIZE)
2432 return -ENAMETOOLONG;
2433
2434 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
2435 if (!inode)
2436 return -ENOSPC;
2437
2438 error = security_inode_init_security(inode, dir, &dentry->d_name,
2439 shmem_initxattrs, NULL);
2440 if (error) {
2441 if (error != -EOPNOTSUPP) {
2442 iput(inode);
2443 return error;
2444 }
2445 error = 0;
2446 }
2447
2448 info = SHMEM_I(inode);
2449 inode->i_size = len-1;
2450 if (len <= SHORT_SYMLINK_LEN) {
2451 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2452 if (!info->symlink) {
2453 iput(inode);
2454 return -ENOMEM;
2455 }
2456 inode->i_op = &shmem_short_symlink_operations;
2457 } else {
2458 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2459 if (error) {
2460 iput(inode);
2461 return error;
2462 }
2463 inode->i_mapping->a_ops = &shmem_aops;
2464 inode->i_op = &shmem_symlink_inode_operations;
2465 kaddr = kmap_atomic(page);
2466 memcpy(kaddr, symname, len);
2467 kunmap_atomic(kaddr);
2468 SetPageUptodate(page);
2469 set_page_dirty(page);
2470 unlock_page(page);
2471 page_cache_release(page);
2472 }
2473 dir->i_size += BOGO_DIRENT_SIZE;
2474 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2475 d_instantiate(dentry, inode);
2476 dget(dentry);
2477 return 0;
2478}
2479
2480static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2481{
2482 nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2483 return NULL;
2484}
2485
2486static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2487{
2488 struct page *page = NULL;
2489 int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2490 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2491 if (page)
2492 unlock_page(page);
2493 return page;
2494}
2495
2496static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2497{
2498 if (!IS_ERR(nd_get_link(nd))) {
2499 struct page *page = cookie;
2500 kunmap(page);
2501 mark_page_accessed(page);
2502 page_cache_release(page);
2503 }
2504}
2505
2506#ifdef CONFIG_TMPFS_XATTR
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517static int shmem_initxattrs(struct inode *inode,
2518 const struct xattr *xattr_array,
2519 void *fs_info)
2520{
2521 struct shmem_inode_info *info = SHMEM_I(inode);
2522 const struct xattr *xattr;
2523 struct simple_xattr *new_xattr;
2524 size_t len;
2525
2526 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
2527 new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
2528 if (!new_xattr)
2529 return -ENOMEM;
2530
2531 len = strlen(xattr->name) + 1;
2532 new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
2533 GFP_KERNEL);
2534 if (!new_xattr->name) {
2535 kfree(new_xattr);
2536 return -ENOMEM;
2537 }
2538
2539 memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
2540 XATTR_SECURITY_PREFIX_LEN);
2541 memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
2542 xattr->name, len);
2543
2544 simple_xattr_list_add(&info->xattrs, new_xattr);
2545 }
2546
2547 return 0;
2548}
2549
2550static const struct xattr_handler *shmem_xattr_handlers[] = {
2551#ifdef CONFIG_TMPFS_POSIX_ACL
2552 &posix_acl_access_xattr_handler,
2553 &posix_acl_default_xattr_handler,
2554#endif
2555 NULL
2556};
2557
2558static int shmem_xattr_validate(const char *name)
2559{
2560 struct { const char *prefix; size_t len; } arr[] = {
2561 { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
2562 { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
2563 };
2564 int i;
2565
2566 for (i = 0; i < ARRAY_SIZE(arr); i++) {
2567 size_t preflen = arr[i].len;
2568 if (strncmp(name, arr[i].prefix, preflen) == 0) {
2569 if (!name[preflen])
2570 return -EINVAL;
2571 return 0;
2572 }
2573 }
2574 return -EOPNOTSUPP;
2575}
2576
2577static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
2578 void *buffer, size_t size)
2579{
2580 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2581 int err;
2582
2583
2584
2585
2586
2587
2588 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2589 return generic_getxattr(dentry, name, buffer, size);
2590
2591 err = shmem_xattr_validate(name);
2592 if (err)
2593 return err;
2594
2595 return simple_xattr_get(&info->xattrs, name, buffer, size);
2596}
2597
2598static int shmem_setxattr(struct dentry *dentry, const char *name,
2599 const void *value, size_t size, int flags)
2600{
2601 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2602 int err;
2603
2604
2605
2606
2607
2608
2609 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2610 return generic_setxattr(dentry, name, value, size, flags);
2611
2612 err = shmem_xattr_validate(name);
2613 if (err)
2614 return err;
2615
2616 return simple_xattr_set(&info->xattrs, name, value, size, flags);
2617}
2618
2619static int shmem_removexattr(struct dentry *dentry, const char *name)
2620{
2621 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2622 int err;
2623
2624
2625
2626
2627
2628
2629 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
2630 return generic_removexattr(dentry, name);
2631
2632 err = shmem_xattr_validate(name);
2633 if (err)
2634 return err;
2635
2636 return simple_xattr_remove(&info->xattrs, name);
2637}
2638
2639static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2640{
2641 struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2642 return simple_xattr_list(&info->xattrs, buffer, size);
2643}
2644#endif
2645
2646static const struct inode_operations shmem_short_symlink_operations = {
2647 .readlink = generic_readlink,
2648 .follow_link = shmem_follow_short_symlink,
2649#ifdef CONFIG_TMPFS_XATTR
2650 .setxattr = shmem_setxattr,
2651 .getxattr = shmem_getxattr,
2652 .listxattr = shmem_listxattr,
2653 .removexattr = shmem_removexattr,
2654#endif
2655};
2656
2657static const struct inode_operations shmem_symlink_inode_operations = {
2658 .readlink = generic_readlink,
2659 .follow_link = shmem_follow_link,
2660 .put_link = shmem_put_link,
2661#ifdef CONFIG_TMPFS_XATTR
2662 .setxattr = shmem_setxattr,
2663 .getxattr = shmem_getxattr,
2664 .listxattr = shmem_listxattr,
2665 .removexattr = shmem_removexattr,
2666#endif
2667};
2668
2669static struct dentry *shmem_get_parent(struct dentry *child)
2670{
2671 return ERR_PTR(-ESTALE);
2672}
2673
2674static int shmem_match(struct inode *ino, void *vfh)
2675{
2676 __u32 *fh = vfh;
2677 __u64 inum = fh[2];
2678 inum = (inum << 32) | fh[1];
2679 return ino->i_ino == inum && fh[0] == ino->i_generation;
2680}
2681
2682static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2683 struct fid *fid, int fh_len, int fh_type)
2684{
2685 struct inode *inode;
2686 struct dentry *dentry = NULL;
2687 u64 inum;
2688
2689 if (fh_len < 3)
2690 return NULL;
2691
2692 inum = fid->raw[2];
2693 inum = (inum << 32) | fid->raw[1];
2694
2695 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2696 shmem_match, fid->raw);
2697 if (inode) {
2698 dentry = d_find_alias(inode);
2699 iput(inode);
2700 }
2701
2702 return dentry;
2703}
2704
2705static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
2706 struct inode *parent)
2707{
2708 if (*len < 3) {
2709 *len = 3;
2710 return FILEID_INVALID;
2711 }
2712
2713 if (inode_unhashed(inode)) {
2714
2715
2716
2717
2718
2719 static DEFINE_SPINLOCK(lock);
2720 spin_lock(&lock);
2721 if (inode_unhashed(inode))
2722 __insert_inode_hash(inode,
2723 inode->i_ino + inode->i_generation);
2724 spin_unlock(&lock);
2725 }
2726
2727 fh[0] = inode->i_generation;
2728 fh[1] = inode->i_ino;
2729 fh[2] = ((__u64)inode->i_ino) >> 32;
2730
2731 *len = 3;
2732 return 1;
2733}
2734
2735static const struct export_operations shmem_export_ops = {
2736 .get_parent = shmem_get_parent,
2737 .encode_fh = shmem_encode_fh,
2738 .fh_to_dentry = shmem_fh_to_dentry,
2739};
2740
2741static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2742 bool remount)
2743{
2744 char *this_char, *value, *rest;
2745 struct mempolicy *mpol = NULL;
2746 uid_t uid;
2747 gid_t gid;
2748
2749 while (options != NULL) {
2750 this_char = options;
2751 for (;;) {
2752
2753
2754
2755
2756
2757 options = strchr(options, ',');
2758 if (options == NULL)
2759 break;
2760 options++;
2761 if (!isdigit(*options)) {
2762 options[-1] = '\0';
2763 break;
2764 }
2765 }
2766 if (!*this_char)
2767 continue;
2768 if ((value = strchr(this_char,'=')) != NULL) {
2769 *value++ = 0;
2770 } else {
2771 printk(KERN_ERR
2772 "tmpfs: No value for mount option '%s'\n",
2773 this_char);
2774 goto error;
2775 }
2776
2777 if (!strcmp(this_char,"size")) {
2778 unsigned long long size;
2779 size = memparse(value,&rest);
2780 if (*rest == '%') {
2781 size <<= PAGE_SHIFT;
2782 size *= totalram_pages;
2783 do_div(size, 100);
2784 rest++;
2785 }
2786 if (*rest)
2787 goto bad_val;
2788 sbinfo->max_blocks =
2789 DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
2790 } else if (!strcmp(this_char,"nr_blocks")) {
2791 sbinfo->max_blocks = memparse(value, &rest);
2792 if (*rest)
2793 goto bad_val;
2794 } else if (!strcmp(this_char,"nr_inodes")) {
2795 sbinfo->max_inodes = memparse(value, &rest);
2796 if (*rest)
2797 goto bad_val;
2798 } else if (!strcmp(this_char,"mode")) {
2799 if (remount)
2800 continue;
2801 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
2802 if (*rest)
2803 goto bad_val;
2804 } else if (!strcmp(this_char,"uid")) {
2805 if (remount)
2806 continue;
2807 uid = simple_strtoul(value, &rest, 0);
2808 if (*rest)
2809 goto bad_val;
2810 sbinfo->uid = make_kuid(current_user_ns(), uid);
2811 if (!uid_valid(sbinfo->uid))
2812 goto bad_val;
2813 } else if (!strcmp(this_char,"gid")) {
2814 if (remount)
2815 continue;
2816 gid = simple_strtoul(value, &rest, 0);
2817 if (*rest)
2818 goto bad_val;
2819 sbinfo->gid = make_kgid(current_user_ns(), gid);
2820 if (!gid_valid(sbinfo->gid))
2821 goto bad_val;
2822 } else if (!strcmp(this_char,"mpol")) {
2823 mpol_put(mpol);
2824 mpol = NULL;
2825 if (mpol_parse_str(value, &mpol))
2826 goto bad_val;
2827 } else {
2828 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
2829 this_char);
2830 goto error;
2831 }
2832 }
2833 sbinfo->mpol = mpol;
2834 return 0;
2835
2836bad_val:
2837 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
2838 value, this_char);
2839error:
2840 mpol_put(mpol);
2841 return 1;
2842
2843}
2844
2845static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2846{
2847 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2848 struct shmem_sb_info config = *sbinfo;
2849 unsigned long inodes;
2850 int error = -EINVAL;
2851
2852 config.mpol = NULL;
2853 if (shmem_parse_options(data, &config, true))
2854 return error;
2855
2856 spin_lock(&sbinfo->stat_lock);
2857 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
2858 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
2859 goto out;
2860 if (config.max_inodes < inodes)
2861 goto out;
2862
2863
2864
2865
2866
2867 if (config.max_blocks && !sbinfo->max_blocks)
2868 goto out;
2869 if (config.max_inodes && !sbinfo->max_inodes)
2870 goto out;
2871
2872 error = 0;
2873 sbinfo->max_blocks = config.max_blocks;
2874 sbinfo->max_inodes = config.max_inodes;
2875 sbinfo->free_inodes = config.max_inodes - inodes;
2876
2877
2878
2879
2880 if (config.mpol) {
2881 mpol_put(sbinfo->mpol);
2882 sbinfo->mpol = config.mpol;
2883 }
2884out:
2885 spin_unlock(&sbinfo->stat_lock);
2886 return error;
2887}
2888
2889static int shmem_show_options(struct seq_file *seq, struct dentry *root)
2890{
2891 struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
2892
2893 if (sbinfo->max_blocks != shmem_default_max_blocks())
2894 seq_printf(seq, ",size=%luk",
2895 sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
2896 if (sbinfo->max_inodes != shmem_default_max_inodes())
2897 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
2898 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
2899 seq_printf(seq, ",mode=%03ho", sbinfo->mode);
2900 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
2901 seq_printf(seq, ",uid=%u",
2902 from_kuid_munged(&init_user_ns, sbinfo->uid));
2903 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
2904 seq_printf(seq, ",gid=%u",
2905 from_kgid_munged(&init_user_ns, sbinfo->gid));
2906 shmem_show_mpol(seq, sbinfo->mpol);
2907 return 0;
2908}
2909
2910#define MFD_NAME_PREFIX "memfd:"
2911#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
2912#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
2913
2914#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
2915
2916SYSCALL_DEFINE2(memfd_create,
2917 const char __user *, uname,
2918 unsigned int, flags)
2919{
2920 struct shmem_inode_info *info;
2921 struct file *file;
2922 int fd, error;
2923 char *name;
2924 long len;
2925
2926 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
2927 return -EINVAL;
2928
2929
2930 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
2931 if (len <= 0)
2932 return -EFAULT;
2933 if (len > MFD_NAME_MAX_LEN + 1)
2934 return -EINVAL;
2935
2936 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
2937 if (!name)
2938 return -ENOMEM;
2939
2940 strcpy(name, MFD_NAME_PREFIX);
2941 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
2942 error = -EFAULT;
2943 goto err_name;
2944 }
2945
2946
2947 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
2948 error = -EFAULT;
2949 goto err_name;
2950 }
2951
2952 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
2953 if (fd < 0) {
2954 error = fd;
2955 goto err_name;
2956 }
2957
2958 file = shmem_file_setup(name, 0, VM_NORESERVE);
2959 if (IS_ERR(file)) {
2960 error = PTR_ERR(file);
2961 goto err_fd;
2962 }
2963 info = SHMEM_I(file_inode(file));
2964 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
2965 file->f_flags |= O_RDWR | O_LARGEFILE;
2966 if (flags & MFD_ALLOW_SEALING)
2967 info->seals &= ~F_SEAL_SEAL;
2968
2969 fd_install(fd, file);
2970 kfree(name);
2971 return fd;
2972
2973err_fd:
2974 put_unused_fd(fd);
2975err_name:
2976 kfree(name);
2977 return error;
2978}
2979
2980#endif
2981
2982static void shmem_put_super(struct super_block *sb)
2983{
2984 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2985
2986 percpu_counter_destroy(&sbinfo->used_blocks);
2987 mpol_put(sbinfo->mpol);
2988 kfree(sbinfo);
2989 sb->s_fs_info = NULL;
2990}
2991
2992int shmem_fill_super(struct super_block *sb, void *data, int silent)
2993{
2994 struct inode *inode;
2995 struct shmem_sb_info *sbinfo;
2996 int err = -ENOMEM;
2997
2998
2999 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
3000 L1_CACHE_BYTES), GFP_KERNEL);
3001 if (!sbinfo)
3002 return -ENOMEM;
3003
3004 sbinfo->mode = S_IRWXUGO | S_ISVTX;
3005 sbinfo->uid = current_fsuid();
3006 sbinfo->gid = current_fsgid();
3007 sb->s_fs_info = sbinfo;
3008
3009#ifdef CONFIG_TMPFS
3010
3011
3012
3013
3014
3015 if (!(sb->s_flags & MS_KERNMOUNT)) {
3016 sbinfo->max_blocks = shmem_default_max_blocks();
3017 sbinfo->max_inodes = shmem_default_max_inodes();
3018 if (shmem_parse_options(data, sbinfo, false)) {
3019 err = -EINVAL;
3020 goto failed;
3021 }
3022 } else {
3023 sb->s_flags |= MS_NOUSER;
3024 }
3025 sb->s_export_op = &shmem_export_ops;
3026 sb->s_flags |= MS_NOSEC;
3027#else
3028 sb->s_flags |= MS_NOUSER;
3029#endif
3030
3031 spin_lock_init(&sbinfo->stat_lock);
3032 if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
3033 goto failed;
3034 sbinfo->free_inodes = sbinfo->max_inodes;
3035
3036 sb->s_maxbytes = MAX_LFS_FILESIZE;
3037 sb->s_blocksize = PAGE_CACHE_SIZE;
3038 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
3039 sb->s_magic = TMPFS_MAGIC;
3040 sb->s_op = &shmem_ops;
3041 sb->s_time_gran = 1;
3042#ifdef CONFIG_TMPFS_XATTR
3043 sb->s_xattr = shmem_xattr_handlers;
3044#endif
3045#ifdef CONFIG_TMPFS_POSIX_ACL
3046 sb->s_flags |= MS_POSIXACL;
3047#endif
3048
3049 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
3050 if (!inode)
3051 goto failed;
3052 inode->i_uid = sbinfo->uid;
3053 inode->i_gid = sbinfo->gid;
3054 sb->s_root = d_make_root(inode);
3055 if (!sb->s_root)
3056 goto failed;
3057 return 0;
3058
3059failed:
3060 shmem_put_super(sb);
3061 return err;
3062}
3063
3064static struct kmem_cache *shmem_inode_cachep;
3065
3066static struct inode *shmem_alloc_inode(struct super_block *sb)
3067{
3068 struct shmem_inode_info *info;
3069 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
3070 if (!info)
3071 return NULL;
3072 return &info->vfs_inode;
3073}
3074
3075static void shmem_destroy_callback(struct rcu_head *head)
3076{
3077 struct inode *inode = container_of(head, struct inode, i_rcu);
3078 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3079}
3080
3081static void shmem_destroy_inode(struct inode *inode)
3082{
3083 if (S_ISREG(inode->i_mode))
3084 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
3085 call_rcu(&inode->i_rcu, shmem_destroy_callback);
3086}
3087
3088static void shmem_init_inode(void *foo)
3089{
3090 struct shmem_inode_info *info = foo;
3091 inode_init_once(&info->vfs_inode);
3092}
3093
3094static int shmem_init_inodecache(void)
3095{
3096 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
3097 sizeof(struct shmem_inode_info),
3098 0, SLAB_PANIC, shmem_init_inode);
3099 return 0;
3100}
3101
3102static void shmem_destroy_inodecache(void)
3103{
3104 kmem_cache_destroy(shmem_inode_cachep);
3105}
3106
3107static const struct address_space_operations shmem_aops = {
3108 .writepage = shmem_writepage,
3109 .set_page_dirty = __set_page_dirty_no_writeback,
3110#ifdef CONFIG_TMPFS
3111 .write_begin = shmem_write_begin,
3112 .write_end = shmem_write_end,
3113#endif
3114#ifdef CONFIG_MIGRATION
3115 .migratepage = migrate_page,
3116#endif
3117 .error_remove_page = generic_error_remove_page,
3118};
3119
3120static const struct file_operations shmem_file_operations = {
3121 .mmap = shmem_mmap,
3122#ifdef CONFIG_TMPFS
3123 .llseek = shmem_file_llseek,
3124 .read = new_sync_read,
3125 .write = new_sync_write,
3126 .read_iter = shmem_file_read_iter,
3127 .write_iter = generic_file_write_iter,
3128 .fsync = noop_fsync,
3129 .splice_read = shmem_file_splice_read,
3130 .splice_write = iter_file_splice_write,
3131 .fallocate = shmem_fallocate,
3132#endif
3133};
3134
3135static const struct inode_operations shmem_inode_operations = {
3136 .setattr = shmem_setattr,
3137#ifdef CONFIG_TMPFS_XATTR
3138 .setxattr = shmem_setxattr,
3139 .getxattr = shmem_getxattr,
3140 .listxattr = shmem_listxattr,
3141 .removexattr = shmem_removexattr,
3142 .set_acl = simple_set_acl,
3143#endif
3144};
3145
3146static const struct inode_operations shmem_dir_inode_operations = {
3147#ifdef CONFIG_TMPFS
3148 .create = shmem_create,
3149 .lookup = simple_lookup,
3150 .link = shmem_link,
3151 .unlink = shmem_unlink,
3152 .symlink = shmem_symlink,
3153 .mkdir = shmem_mkdir,
3154 .rmdir = shmem_rmdir,
3155 .mknod = shmem_mknod,
3156 .rename2 = shmem_rename2,
3157 .tmpfile = shmem_tmpfile,
3158#endif
3159#ifdef CONFIG_TMPFS_XATTR
3160 .setxattr = shmem_setxattr,
3161 .getxattr = shmem_getxattr,
3162 .listxattr = shmem_listxattr,
3163 .removexattr = shmem_removexattr,
3164#endif
3165#ifdef CONFIG_TMPFS_POSIX_ACL
3166 .setattr = shmem_setattr,
3167 .set_acl = simple_set_acl,
3168#endif
3169};
3170
3171static const struct inode_operations shmem_special_inode_operations = {
3172#ifdef CONFIG_TMPFS_XATTR
3173 .setxattr = shmem_setxattr,
3174 .getxattr = shmem_getxattr,
3175 .listxattr = shmem_listxattr,
3176 .removexattr = shmem_removexattr,
3177#endif
3178#ifdef CONFIG_TMPFS_POSIX_ACL
3179 .setattr = shmem_setattr,
3180 .set_acl = simple_set_acl,
3181#endif
3182};
3183
3184static const struct super_operations shmem_ops = {
3185 .alloc_inode = shmem_alloc_inode,
3186 .destroy_inode = shmem_destroy_inode,
3187#ifdef CONFIG_TMPFS
3188 .statfs = shmem_statfs,
3189 .remount_fs = shmem_remount_fs,
3190 .show_options = shmem_show_options,
3191#endif
3192 .evict_inode = shmem_evict_inode,
3193 .drop_inode = generic_delete_inode,
3194 .put_super = shmem_put_super,
3195};
3196
3197static const struct vm_operations_struct shmem_vm_ops = {
3198 .fault = shmem_fault,
3199 .map_pages = filemap_map_pages,
3200#ifdef CONFIG_NUMA
3201 .set_policy = shmem_set_policy,
3202 .get_policy = shmem_get_policy,
3203#endif
3204 .remap_pages = generic_file_remap_pages,
3205};
3206
3207static struct dentry *shmem_mount(struct file_system_type *fs_type,
3208 int flags, const char *dev_name, void *data)
3209{
3210 return mount_nodev(fs_type, flags, data, shmem_fill_super);
3211}
3212
3213static struct file_system_type shmem_fs_type = {
3214 .owner = THIS_MODULE,
3215 .name = "tmpfs",
3216 .mount = shmem_mount,
3217 .kill_sb = kill_litter_super,
3218 .fs_flags = FS_USERNS_MOUNT,
3219};
3220
3221int __init shmem_init(void)
3222{
3223 int error;
3224
3225
3226 if (shmem_inode_cachep)
3227 return 0;
3228
3229 error = bdi_init(&shmem_backing_dev_info);
3230 if (error)
3231 goto out4;
3232
3233 error = shmem_init_inodecache();
3234 if (error)
3235 goto out3;
3236
3237 error = register_filesystem(&shmem_fs_type);
3238 if (error) {
3239 printk(KERN_ERR "Could not register tmpfs\n");
3240 goto out2;
3241 }
3242
3243 shm_mnt = kern_mount(&shmem_fs_type);
3244 if (IS_ERR(shm_mnt)) {
3245 error = PTR_ERR(shm_mnt);
3246 printk(KERN_ERR "Could not kern_mount tmpfs\n");
3247 goto out1;
3248 }
3249 return 0;
3250
3251out1:
3252 unregister_filesystem(&shmem_fs_type);
3253out2:
3254 shmem_destroy_inodecache();
3255out3:
3256 bdi_destroy(&shmem_backing_dev_info);
3257out4:
3258 shm_mnt = ERR_PTR(error);
3259 return error;
3260}
3261
3262#else
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273static struct file_system_type shmem_fs_type = {
3274 .name = "tmpfs",
3275 .mount = ramfs_mount,
3276 .kill_sb = kill_litter_super,
3277 .fs_flags = FS_USERNS_MOUNT,
3278};
3279
3280int __init shmem_init(void)
3281{
3282 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
3283
3284 shm_mnt = kern_mount(&shmem_fs_type);
3285 BUG_ON(IS_ERR(shm_mnt));
3286
3287 return 0;
3288}
3289
3290int shmem_unuse(swp_entry_t swap, struct page *page)
3291{
3292 return 0;
3293}
3294
3295int shmem_lock(struct file *file, int lock, struct user_struct *user)
3296{
3297 return 0;
3298}
3299
3300void shmem_unlock_mapping(struct address_space *mapping)
3301{
3302}
3303
3304void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
3305{
3306 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
3307}
3308EXPORT_SYMBOL_GPL(shmem_truncate_range);
3309
3310#define shmem_vm_ops generic_file_vm_ops
3311#define shmem_file_operations ramfs_file_operations
3312#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
3313#define shmem_acct_size(flags, size) 0
3314#define shmem_unacct_size(flags, size) do {} while (0)
3315
3316#endif
3317
3318
3319
3320static struct dentry_operations anon_ops = {
3321 .d_dname = simple_dname
3322};
3323
3324static struct file *__shmem_file_setup(const char *name, loff_t size,
3325 unsigned long flags, unsigned int i_flags)
3326{
3327 struct file *res;
3328 struct inode *inode;
3329 struct path path;
3330 struct super_block *sb;
3331 struct qstr this;
3332
3333 if (IS_ERR(shm_mnt))
3334 return ERR_CAST(shm_mnt);
3335
3336 if (size < 0 || size > MAX_LFS_FILESIZE)
3337 return ERR_PTR(-EINVAL);
3338
3339 if (shmem_acct_size(flags, size))
3340 return ERR_PTR(-ENOMEM);
3341
3342 res = ERR_PTR(-ENOMEM);
3343 this.name = name;
3344 this.len = strlen(name);
3345 this.hash = 0;
3346 sb = shm_mnt->mnt_sb;
3347 path.mnt = mntget(shm_mnt);
3348 path.dentry = d_alloc_pseudo(sb, &this);
3349 if (!path.dentry)
3350 goto put_memory;
3351 d_set_d_op(path.dentry, &anon_ops);
3352
3353 res = ERR_PTR(-ENOSPC);
3354 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
3355 if (!inode)
3356 goto put_memory;
3357
3358 inode->i_flags |= i_flags;
3359 d_instantiate(path.dentry, inode);
3360 inode->i_size = size;
3361 clear_nlink(inode);
3362 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
3363 if (IS_ERR(res))
3364 goto put_path;
3365
3366 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
3367 &shmem_file_operations);
3368 if (IS_ERR(res))
3369 goto put_path;
3370
3371 return res;
3372
3373put_memory:
3374 shmem_unacct_size(flags, size);
3375put_path:
3376 path_put(&path);
3377 return res;
3378}
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
3391{
3392 return __shmem_file_setup(name, size, flags, S_PRIVATE);
3393}
3394
3395
3396
3397
3398
3399
3400
3401struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
3402{
3403 return __shmem_file_setup(name, size, flags, 0);
3404}
3405EXPORT_SYMBOL_GPL(shmem_file_setup);
3406
3407
3408
3409
3410
3411int shmem_zero_setup(struct vm_area_struct *vma)
3412{
3413 struct file *file;
3414 loff_t size = vma->vm_end - vma->vm_start;
3415
3416 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
3417 if (IS_ERR(file))
3418 return PTR_ERR(file);
3419
3420 if (vma->vm_file)
3421 fput(vma->vm_file);
3422 vma->vm_file = file;
3423 vma->vm_ops = &shmem_vm_ops;
3424 return 0;
3425}
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
3443 pgoff_t index, gfp_t gfp)
3444{
3445#ifdef CONFIG_SHMEM
3446 struct inode *inode = mapping->host;
3447 struct page *page;
3448 int error;
3449
3450 BUG_ON(mapping->a_ops != &shmem_aops);
3451 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
3452 if (error)
3453 page = ERR_PTR(error);
3454 else
3455 unlock_page(page);
3456 return page;
3457#else
3458
3459
3460
3461 return read_cache_page_gfp(mapping, index, gfp);
3462#endif
3463}
3464EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
3465