1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/fs.h>
25#include <linux/init.h>
26#include <linux/vfs.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/pagemap.h>
30#include <linux/file.h>
31#include <linux/mm.h>
32#include <linux/sched/signal.h>
33#include <linux/export.h>
34#include <linux/swap.h>
35#include <linux/uio.h>
36#include <linux/khugepaged.h>
37#include <linux/hugetlb.h>
38
39#include <asm/tlbflush.h>
40
41static struct vfsmount *shm_mnt;
42
43#ifdef CONFIG_SHMEM
44
45
46
47
48
49
50#include <linux/xattr.h>
51#include <linux/exportfs.h>
52#include <linux/posix_acl.h>
53#include <linux/posix_acl_xattr.h>
54#include <linux/mman.h>
55#include <linux/string.h>
56#include <linux/slab.h>
57#include <linux/backing-dev.h>
58#include <linux/shmem_fs.h>
59#include <linux/writeback.h>
60#include <linux/blkdev.h>
61#include <linux/pagevec.h>
62#include <linux/percpu_counter.h>
63#include <linux/falloc.h>
64#include <linux/splice.h>
65#include <linux/security.h>
66#include <linux/swapops.h>
67#include <linux/mempolicy.h>
68#include <linux/namei.h>
69#include <linux/ctype.h>
70#include <linux/migrate.h>
71#include <linux/highmem.h>
72#include <linux/seq_file.h>
73#include <linux/magic.h>
74#include <linux/syscalls.h>
75#include <linux/fcntl.h>
76#include <uapi/linux/memfd.h>
77#include <linux/userfaultfd_k.h>
78#include <linux/rmap.h>
79#include <linux/uuid.h>
80
81#include <linux/uaccess.h>
82#include <asm/pgtable.h>
83
84#include "internal.h"
85
86#define BLOCKS_PER_PAGE (PAGE_SIZE/512)
87#define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT)
88
89
90#define BOGO_DIRENT_SIZE 20
91
92
93#define SHORT_SYMLINK_LEN 128
94
95
96
97
98
99
100struct shmem_falloc {
101 wait_queue_head_t *waitq;
102 pgoff_t start;
103 pgoff_t next;
104 pgoff_t nr_falloced;
105 pgoff_t nr_unswapped;
106};
107
108#ifdef CONFIG_TMPFS
109static unsigned long shmem_default_max_blocks(void)
110{
111 return totalram_pages / 2;
112}
113
114static unsigned long shmem_default_max_inodes(void)
115{
116 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
117}
118#endif
119
120static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
121static int shmem_replace_page(struct page **pagep, gfp_t gfp,
122 struct shmem_inode_info *info, pgoff_t index);
123static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
124 struct page **pagep, enum sgp_type sgp,
125 gfp_t gfp, struct vm_area_struct *vma,
126 struct vm_fault *vmf, int *fault_type);
127
128int shmem_getpage(struct inode *inode, pgoff_t index,
129 struct page **pagep, enum sgp_type sgp)
130{
131 return shmem_getpage_gfp(inode, index, pagep, sgp,
132 mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
133}
134
135static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
136{
137 return sb->s_fs_info;
138}
139
140
141
142
143
144
145
146static inline int shmem_acct_size(unsigned long flags, loff_t size)
147{
148 return (flags & VM_NORESERVE) ?
149 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
150}
151
152static inline void shmem_unacct_size(unsigned long flags, loff_t size)
153{
154 if (!(flags & VM_NORESERVE))
155 vm_unacct_memory(VM_ACCT(size));
156}
157
158static inline int shmem_reacct_size(unsigned long flags,
159 loff_t oldsize, loff_t newsize)
160{
161 if (!(flags & VM_NORESERVE)) {
162 if (VM_ACCT(newsize) > VM_ACCT(oldsize))
163 return security_vm_enough_memory_mm(current->mm,
164 VM_ACCT(newsize) - VM_ACCT(oldsize));
165 else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
166 vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
167 }
168 return 0;
169}
170
171
172
173
174
175
176
177static inline int shmem_acct_block(unsigned long flags, long pages)
178{
179 if (!(flags & VM_NORESERVE))
180 return 0;
181
182 return security_vm_enough_memory_mm(current->mm,
183 pages * VM_ACCT(PAGE_SIZE));
184}
185
186static inline void shmem_unacct_blocks(unsigned long flags, long pages)
187{
188 if (flags & VM_NORESERVE)
189 vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
190}
191
192static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
193{
194 struct shmem_inode_info *info = SHMEM_I(inode);
195 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
196
197 if (shmem_acct_block(info->flags, pages))
198 return false;
199
200 if (sbinfo->max_blocks) {
201 if (percpu_counter_compare(&sbinfo->used_blocks,
202 sbinfo->max_blocks - pages) > 0)
203 goto unacct;
204 percpu_counter_add(&sbinfo->used_blocks, pages);
205 }
206
207 return true;
208
209unacct:
210 shmem_unacct_blocks(info->flags, pages);
211 return false;
212}
213
214static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
215{
216 struct shmem_inode_info *info = SHMEM_I(inode);
217 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
218
219 if (sbinfo->max_blocks)
220 percpu_counter_sub(&sbinfo->used_blocks, pages);
221 shmem_unacct_blocks(info->flags, pages);
222}
223
224static const struct super_operations shmem_ops;
225static const struct address_space_operations shmem_aops;
226static const struct file_operations shmem_file_operations;
227static const struct inode_operations shmem_inode_operations;
228static const struct inode_operations shmem_dir_inode_operations;
229static const struct inode_operations shmem_special_inode_operations;
230static const struct vm_operations_struct shmem_vm_ops;
231static struct file_system_type shmem_fs_type;
232
233bool vma_is_shmem(struct vm_area_struct *vma)
234{
235 return vma->vm_ops == &shmem_vm_ops;
236}
237
238static LIST_HEAD(shmem_swaplist);
239static DEFINE_MUTEX(shmem_swaplist_mutex);
240
241static int shmem_reserve_inode(struct super_block *sb)
242{
243 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
244 if (sbinfo->max_inodes) {
245 spin_lock(&sbinfo->stat_lock);
246 if (!sbinfo->free_inodes) {
247 spin_unlock(&sbinfo->stat_lock);
248 return -ENOSPC;
249 }
250 sbinfo->free_inodes--;
251 spin_unlock(&sbinfo->stat_lock);
252 }
253 return 0;
254}
255
256static void shmem_free_inode(struct super_block *sb)
257{
258 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
259 if (sbinfo->max_inodes) {
260 spin_lock(&sbinfo->stat_lock);
261 sbinfo->free_inodes++;
262 spin_unlock(&sbinfo->stat_lock);
263 }
264}
265
266
267
268
269
270
271
272
273
274
275
276
277
278static void shmem_recalc_inode(struct inode *inode)
279{
280 struct shmem_inode_info *info = SHMEM_I(inode);
281 long freed;
282
283 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
284 if (freed > 0) {
285 info->alloced -= freed;
286 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
287 shmem_inode_unacct_blocks(inode, freed);
288 }
289}
290
291bool shmem_charge(struct inode *inode, long pages)
292{
293 struct shmem_inode_info *info = SHMEM_I(inode);
294 unsigned long flags;
295
296 if (!shmem_inode_acct_block(inode, pages))
297 return false;
298
299 spin_lock_irqsave(&info->lock, flags);
300 info->alloced += pages;
301 inode->i_blocks += pages * BLOCKS_PER_PAGE;
302 shmem_recalc_inode(inode);
303 spin_unlock_irqrestore(&info->lock, flags);
304 inode->i_mapping->nrpages += pages;
305
306 return true;
307}
308
309void shmem_uncharge(struct inode *inode, long pages)
310{
311 struct shmem_inode_info *info = SHMEM_I(inode);
312 unsigned long flags;
313
314 spin_lock_irqsave(&info->lock, flags);
315 info->alloced -= pages;
316 inode->i_blocks -= pages * BLOCKS_PER_PAGE;
317 shmem_recalc_inode(inode);
318 spin_unlock_irqrestore(&info->lock, flags);
319
320 shmem_inode_unacct_blocks(inode, pages);
321}
322
323
324
325
326static int shmem_radix_tree_replace(struct address_space *mapping,
327 pgoff_t index, void *expected, void *replacement)
328{
329 struct radix_tree_node *node;
330 void **pslot;
331 void *item;
332
333 VM_BUG_ON(!expected);
334 VM_BUG_ON(!replacement);
335 item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
336 if (!item)
337 return -ENOENT;
338 if (item != expected)
339 return -ENOENT;
340 __radix_tree_replace(&mapping->page_tree, node, pslot,
341 replacement, NULL, NULL);
342 return 0;
343}
344
345
346
347
348
349
350
351
352static bool shmem_confirm_swap(struct address_space *mapping,
353 pgoff_t index, swp_entry_t swap)
354{
355 void *item;
356
357 rcu_read_lock();
358 item = radix_tree_lookup(&mapping->page_tree, index);
359 rcu_read_unlock();
360 return item == swp_to_radix_entry(swap);
361}
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377#define SHMEM_HUGE_NEVER 0
378#define SHMEM_HUGE_ALWAYS 1
379#define SHMEM_HUGE_WITHIN_SIZE 2
380#define SHMEM_HUGE_ADVISE 3
381
382
383
384
385
386
387
388
389
390
391
392#define SHMEM_HUGE_DENY (-1)
393#define SHMEM_HUGE_FORCE (-2)
394
395#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
396
397
398int shmem_huge __read_mostly;
399
400#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
401static int shmem_parse_huge(const char *str)
402{
403 if (!strcmp(str, "never"))
404 return SHMEM_HUGE_NEVER;
405 if (!strcmp(str, "always"))
406 return SHMEM_HUGE_ALWAYS;
407 if (!strcmp(str, "within_size"))
408 return SHMEM_HUGE_WITHIN_SIZE;
409 if (!strcmp(str, "advise"))
410 return SHMEM_HUGE_ADVISE;
411 if (!strcmp(str, "deny"))
412 return SHMEM_HUGE_DENY;
413 if (!strcmp(str, "force"))
414 return SHMEM_HUGE_FORCE;
415 return -EINVAL;
416}
417
418static const char *shmem_format_huge(int huge)
419{
420 switch (huge) {
421 case SHMEM_HUGE_NEVER:
422 return "never";
423 case SHMEM_HUGE_ALWAYS:
424 return "always";
425 case SHMEM_HUGE_WITHIN_SIZE:
426 return "within_size";
427 case SHMEM_HUGE_ADVISE:
428 return "advise";
429 case SHMEM_HUGE_DENY:
430 return "deny";
431 case SHMEM_HUGE_FORCE:
432 return "force";
433 default:
434 VM_BUG_ON(1);
435 return "bad_val";
436 }
437}
438#endif
439
440static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
441 struct shrink_control *sc, unsigned long nr_to_split)
442{
443 LIST_HEAD(list), *pos, *next;
444 LIST_HEAD(to_remove);
445 struct inode *inode;
446 struct shmem_inode_info *info;
447 struct page *page;
448 unsigned long batch = sc ? sc->nr_to_scan : 128;
449 int removed = 0, split = 0;
450
451 if (list_empty(&sbinfo->shrinklist))
452 return SHRINK_STOP;
453
454 spin_lock(&sbinfo->shrinklist_lock);
455 list_for_each_safe(pos, next, &sbinfo->shrinklist) {
456 info = list_entry(pos, struct shmem_inode_info, shrinklist);
457
458
459 inode = igrab(&info->vfs_inode);
460
461
462 if (!inode) {
463 list_del_init(&info->shrinklist);
464 removed++;
465 goto next;
466 }
467
468
469 if (round_up(inode->i_size, PAGE_SIZE) ==
470 round_up(inode->i_size, HPAGE_PMD_SIZE)) {
471 list_move(&info->shrinklist, &to_remove);
472 removed++;
473 goto next;
474 }
475
476 list_move(&info->shrinklist, &list);
477next:
478 if (!--batch)
479 break;
480 }
481 spin_unlock(&sbinfo->shrinklist_lock);
482
483 list_for_each_safe(pos, next, &to_remove) {
484 info = list_entry(pos, struct shmem_inode_info, shrinklist);
485 inode = &info->vfs_inode;
486 list_del_init(&info->shrinklist);
487 iput(inode);
488 }
489
490 list_for_each_safe(pos, next, &list) {
491 int ret;
492
493 info = list_entry(pos, struct shmem_inode_info, shrinklist);
494 inode = &info->vfs_inode;
495
496 if (nr_to_split && split >= nr_to_split) {
497 iput(inode);
498 continue;
499 }
500
501 page = find_lock_page(inode->i_mapping,
502 (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
503 if (!page)
504 goto drop;
505
506 if (!PageTransHuge(page)) {
507 unlock_page(page);
508 put_page(page);
509 goto drop;
510 }
511
512 ret = split_huge_page(page);
513 unlock_page(page);
514 put_page(page);
515
516 if (ret) {
517
518 iput(inode);
519 continue;
520 }
521
522 split++;
523drop:
524 list_del_init(&info->shrinklist);
525 removed++;
526 iput(inode);
527 }
528
529 spin_lock(&sbinfo->shrinklist_lock);
530 list_splice_tail(&list, &sbinfo->shrinklist);
531 sbinfo->shrinklist_len -= removed;
532 spin_unlock(&sbinfo->shrinklist_lock);
533
534 return split;
535}
536
537static long shmem_unused_huge_scan(struct super_block *sb,
538 struct shrink_control *sc)
539{
540 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
541
542 if (!READ_ONCE(sbinfo->shrinklist_len))
543 return SHRINK_STOP;
544
545 return shmem_unused_huge_shrink(sbinfo, sc, 0);
546}
547
548static long shmem_unused_huge_count(struct super_block *sb,
549 struct shrink_control *sc)
550{
551 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
552 return READ_ONCE(sbinfo->shrinklist_len);
553}
554#else
555
556#define shmem_huge SHMEM_HUGE_DENY
557
558static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
559 struct shrink_control *sc, unsigned long nr_to_split)
560{
561 return 0;
562}
563#endif
564
565
566
567
568static int shmem_add_to_page_cache(struct page *page,
569 struct address_space *mapping,
570 pgoff_t index, void *expected)
571{
572 int error, nr = hpage_nr_pages(page);
573
574 VM_BUG_ON_PAGE(PageTail(page), page);
575 VM_BUG_ON_PAGE(index != round_down(index, nr), page);
576 VM_BUG_ON_PAGE(!PageLocked(page), page);
577 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
578 VM_BUG_ON(expected && PageTransHuge(page));
579
580 page_ref_add(page, nr);
581 page->mapping = mapping;
582 page->index = index;
583
584 spin_lock_irq(&mapping->tree_lock);
585 if (PageTransHuge(page)) {
586 void __rcu **results;
587 pgoff_t idx;
588 int i;
589
590 error = 0;
591 if (radix_tree_gang_lookup_slot(&mapping->page_tree,
592 &results, &idx, index, 1) &&
593 idx < index + HPAGE_PMD_NR) {
594 error = -EEXIST;
595 }
596
597 if (!error) {
598 for (i = 0; i < HPAGE_PMD_NR; i++) {
599 error = radix_tree_insert(&mapping->page_tree,
600 index + i, page + i);
601 VM_BUG_ON(error);
602 }
603 count_vm_event(THP_FILE_ALLOC);
604 }
605 } else if (!expected) {
606 error = radix_tree_insert(&mapping->page_tree, index, page);
607 } else {
608 error = shmem_radix_tree_replace(mapping, index, expected,
609 page);
610 }
611
612 if (!error) {
613 mapping->nrpages += nr;
614 if (PageTransHuge(page))
615 __inc_node_page_state(page, NR_SHMEM_THPS);
616 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
617 __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
618 spin_unlock_irq(&mapping->tree_lock);
619 } else {
620 page->mapping = NULL;
621 spin_unlock_irq(&mapping->tree_lock);
622 page_ref_sub(page, nr);
623 }
624 return error;
625}
626
627
628
629
630static void shmem_delete_from_page_cache(struct page *page, void *radswap)
631{
632 struct address_space *mapping = page->mapping;
633 int error;
634
635 VM_BUG_ON_PAGE(PageCompound(page), page);
636
637 spin_lock_irq(&mapping->tree_lock);
638 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
639 page->mapping = NULL;
640 mapping->nrpages--;
641 __dec_node_page_state(page, NR_FILE_PAGES);
642 __dec_node_page_state(page, NR_SHMEM);
643 spin_unlock_irq(&mapping->tree_lock);
644 put_page(page);
645 BUG_ON(error);
646}
647
648
649
650
651static int shmem_free_swap(struct address_space *mapping,
652 pgoff_t index, void *radswap)
653{
654 void *old;
655
656 spin_lock_irq(&mapping->tree_lock);
657 old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
658 spin_unlock_irq(&mapping->tree_lock);
659 if (old != radswap)
660 return -ENOENT;
661 free_swap_and_cache(radix_to_swp_entry(radswap));
662 return 0;
663}
664
665
666
667
668
669
670
671
672unsigned long shmem_partial_swap_usage(struct address_space *mapping,
673 pgoff_t start, pgoff_t end)
674{
675 struct radix_tree_iter iter;
676 void **slot;
677 struct page *page;
678 unsigned long swapped = 0;
679
680 rcu_read_lock();
681
682 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
683 if (iter.index >= end)
684 break;
685
686 page = radix_tree_deref_slot(slot);
687
688 if (radix_tree_deref_retry(page)) {
689 slot = radix_tree_iter_retry(&iter);
690 continue;
691 }
692
693 if (radix_tree_exceptional_entry(page))
694 swapped++;
695
696 if (need_resched()) {
697 slot = radix_tree_iter_resume(slot, &iter);
698 cond_resched_rcu();
699 }
700 }
701
702 rcu_read_unlock();
703
704 return swapped << PAGE_SHIFT;
705}
706
707
708
709
710
711
712
713
714unsigned long shmem_swap_usage(struct vm_area_struct *vma)
715{
716 struct inode *inode = file_inode(vma->vm_file);
717 struct shmem_inode_info *info = SHMEM_I(inode);
718 struct address_space *mapping = inode->i_mapping;
719 unsigned long swapped;
720
721
722 swapped = READ_ONCE(info->swapped);
723
724
725
726
727
728
729 if (!swapped)
730 return 0;
731
732 if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
733 return swapped << PAGE_SHIFT;
734
735
736 return shmem_partial_swap_usage(mapping,
737 linear_page_index(vma, vma->vm_start),
738 linear_page_index(vma, vma->vm_end));
739}
740
741
742
743
744void shmem_unlock_mapping(struct address_space *mapping)
745{
746 struct pagevec pvec;
747 pgoff_t indices[PAGEVEC_SIZE];
748 pgoff_t index = 0;
749
750 pagevec_init(&pvec, 0);
751
752
753
754 while (!mapping_unevictable(mapping)) {
755
756
757
758
759 pvec.nr = find_get_entries(mapping, index,
760 PAGEVEC_SIZE, pvec.pages, indices);
761 if (!pvec.nr)
762 break;
763 index = indices[pvec.nr - 1] + 1;
764 pagevec_remove_exceptionals(&pvec);
765 check_move_unevictable_pages(pvec.pages, pvec.nr);
766 pagevec_release(&pvec);
767 cond_resched();
768 }
769}
770
771
772
773
774
775static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
776 bool unfalloc)
777{
778 struct address_space *mapping = inode->i_mapping;
779 struct shmem_inode_info *info = SHMEM_I(inode);
780 pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
781 pgoff_t end = (lend + 1) >> PAGE_SHIFT;
782 unsigned int partial_start = lstart & (PAGE_SIZE - 1);
783 unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
784 struct pagevec pvec;
785 pgoff_t indices[PAGEVEC_SIZE];
786 long nr_swaps_freed = 0;
787 pgoff_t index;
788 int i;
789
790 if (lend == -1)
791 end = -1;
792
793 pagevec_init(&pvec, 0);
794 index = start;
795 while (index < end) {
796 pvec.nr = find_get_entries(mapping, index,
797 min(end - index, (pgoff_t)PAGEVEC_SIZE),
798 pvec.pages, indices);
799 if (!pvec.nr)
800 break;
801 for (i = 0; i < pagevec_count(&pvec); i++) {
802 struct page *page = pvec.pages[i];
803
804 index = indices[i];
805 if (index >= end)
806 break;
807
808 if (radix_tree_exceptional_entry(page)) {
809 if (unfalloc)
810 continue;
811 nr_swaps_freed += !shmem_free_swap(mapping,
812 index, page);
813 continue;
814 }
815
816 VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
817
818 if (!trylock_page(page))
819 continue;
820
821 if (PageTransTail(page)) {
822
823 clear_highpage(page);
824 unlock_page(page);
825 continue;
826 } else if (PageTransHuge(page)) {
827 if (index == round_down(end, HPAGE_PMD_NR)) {
828
829
830
831
832 clear_highpage(page);
833 unlock_page(page);
834 continue;
835 }
836 index += HPAGE_PMD_NR - 1;
837 i += HPAGE_PMD_NR - 1;
838 }
839
840 if (!unfalloc || !PageUptodate(page)) {
841 VM_BUG_ON_PAGE(PageTail(page), page);
842 if (page_mapping(page) == mapping) {
843 VM_BUG_ON_PAGE(PageWriteback(page), page);
844 truncate_inode_page(mapping, page);
845 }
846 }
847 unlock_page(page);
848 }
849 pagevec_remove_exceptionals(&pvec);
850 pagevec_release(&pvec);
851 cond_resched();
852 index++;
853 }
854
855 if (partial_start) {
856 struct page *page = NULL;
857 shmem_getpage(inode, start - 1, &page, SGP_READ);
858 if (page) {
859 unsigned int top = PAGE_SIZE;
860 if (start > end) {
861 top = partial_end;
862 partial_end = 0;
863 }
864 zero_user_segment(page, partial_start, top);
865 set_page_dirty(page);
866 unlock_page(page);
867 put_page(page);
868 }
869 }
870 if (partial_end) {
871 struct page *page = NULL;
872 shmem_getpage(inode, end, &page, SGP_READ);
873 if (page) {
874 zero_user_segment(page, 0, partial_end);
875 set_page_dirty(page);
876 unlock_page(page);
877 put_page(page);
878 }
879 }
880 if (start >= end)
881 return;
882
883 index = start;
884 while (index < end) {
885 cond_resched();
886
887 pvec.nr = find_get_entries(mapping, index,
888 min(end - index, (pgoff_t)PAGEVEC_SIZE),
889 pvec.pages, indices);
890 if (!pvec.nr) {
891
892 if (index == start || end != -1)
893 break;
894
895 index = start;
896 continue;
897 }
898 for (i = 0; i < pagevec_count(&pvec); i++) {
899 struct page *page = pvec.pages[i];
900
901 index = indices[i];
902 if (index >= end)
903 break;
904
905 if (radix_tree_exceptional_entry(page)) {
906 if (unfalloc)
907 continue;
908 if (shmem_free_swap(mapping, index, page)) {
909
910 index--;
911 break;
912 }
913 nr_swaps_freed++;
914 continue;
915 }
916
917 lock_page(page);
918
919 if (PageTransTail(page)) {
920
921 clear_highpage(page);
922 unlock_page(page);
923
924
925
926
927
928 if (index != round_down(end, HPAGE_PMD_NR))
929 start++;
930 continue;
931 } else if (PageTransHuge(page)) {
932 if (index == round_down(end, HPAGE_PMD_NR)) {
933
934
935
936
937 clear_highpage(page);
938 unlock_page(page);
939 continue;
940 }
941 index += HPAGE_PMD_NR - 1;
942 i += HPAGE_PMD_NR - 1;
943 }
944
945 if (!unfalloc || !PageUptodate(page)) {
946 VM_BUG_ON_PAGE(PageTail(page), page);
947 if (page_mapping(page) == mapping) {
948 VM_BUG_ON_PAGE(PageWriteback(page), page);
949 truncate_inode_page(mapping, page);
950 } else {
951
952 unlock_page(page);
953 index--;
954 break;
955 }
956 }
957 unlock_page(page);
958 }
959 pagevec_remove_exceptionals(&pvec);
960 pagevec_release(&pvec);
961 index++;
962 }
963
964 spin_lock_irq(&info->lock);
965 info->swapped -= nr_swaps_freed;
966 shmem_recalc_inode(inode);
967 spin_unlock_irq(&info->lock);
968}
969
970void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
971{
972 shmem_undo_range(inode, lstart, lend, false);
973 inode->i_ctime = inode->i_mtime = current_time(inode);
974}
975EXPORT_SYMBOL_GPL(shmem_truncate_range);
976
977static int shmem_getattr(const struct path *path, struct kstat *stat,
978 u32 request_mask, unsigned int query_flags)
979{
980 struct inode *inode = path->dentry->d_inode;
981 struct shmem_inode_info *info = SHMEM_I(inode);
982
983 if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
984 spin_lock_irq(&info->lock);
985 shmem_recalc_inode(inode);
986 spin_unlock_irq(&info->lock);
987 }
988 generic_fillattr(inode, stat);
989 return 0;
990}
991
992static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
993{
994 struct inode *inode = d_inode(dentry);
995 struct shmem_inode_info *info = SHMEM_I(inode);
996 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
997 int error;
998
999 error = setattr_prepare(dentry, attr);
1000 if (error)
1001 return error;
1002
1003 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
1004 loff_t oldsize = inode->i_size;
1005 loff_t newsize = attr->ia_size;
1006
1007
1008 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
1009 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
1010 return -EPERM;
1011
1012 if (newsize != oldsize) {
1013 error = shmem_reacct_size(SHMEM_I(inode)->flags,
1014 oldsize, newsize);
1015 if (error)
1016 return error;
1017 i_size_write(inode, newsize);
1018 inode->i_ctime = inode->i_mtime = current_time(inode);
1019 }
1020 if (newsize <= oldsize) {
1021 loff_t holebegin = round_up(newsize, PAGE_SIZE);
1022 if (oldsize > holebegin)
1023 unmap_mapping_range(inode->i_mapping,
1024 holebegin, 0, 1);
1025 if (info->alloced)
1026 shmem_truncate_range(inode,
1027 newsize, (loff_t)-1);
1028
1029 if (oldsize > holebegin)
1030 unmap_mapping_range(inode->i_mapping,
1031 holebegin, 0, 1);
1032
1033
1034
1035
1036
1037 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
1038 spin_lock(&sbinfo->shrinklist_lock);
1039
1040
1041
1042
1043 if (list_empty_careful(&info->shrinklist)) {
1044 list_add_tail(&info->shrinklist,
1045 &sbinfo->shrinklist);
1046 sbinfo->shrinklist_len++;
1047 }
1048 spin_unlock(&sbinfo->shrinklist_lock);
1049 }
1050 }
1051 }
1052
1053 setattr_copy(inode, attr);
1054 if (attr->ia_valid & ATTR_MODE)
1055 error = posix_acl_chmod(inode, inode->i_mode);
1056 return error;
1057}
1058
1059static void shmem_evict_inode(struct inode *inode)
1060{
1061 struct shmem_inode_info *info = SHMEM_I(inode);
1062 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1063
1064 if (inode->i_mapping->a_ops == &shmem_aops) {
1065 shmem_unacct_size(info->flags, inode->i_size);
1066 inode->i_size = 0;
1067 shmem_truncate_range(inode, 0, (loff_t)-1);
1068 if (!list_empty(&info->shrinklist)) {
1069 spin_lock(&sbinfo->shrinklist_lock);
1070 if (!list_empty(&info->shrinklist)) {
1071 list_del_init(&info->shrinklist);
1072 sbinfo->shrinklist_len--;
1073 }
1074 spin_unlock(&sbinfo->shrinklist_lock);
1075 }
1076 if (!list_empty(&info->swaplist)) {
1077 mutex_lock(&shmem_swaplist_mutex);
1078 list_del_init(&info->swaplist);
1079 mutex_unlock(&shmem_swaplist_mutex);
1080 }
1081 }
1082
1083 simple_xattrs_free(&info->xattrs);
1084 WARN_ON(inode->i_blocks);
1085 shmem_free_inode(inode->i_sb);
1086 clear_inode(inode);
1087}
1088
1089static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
1090{
1091 struct radix_tree_iter iter;
1092 void **slot;
1093 unsigned long found = -1;
1094 unsigned int checked = 0;
1095
1096 rcu_read_lock();
1097 radix_tree_for_each_slot(slot, root, &iter, 0) {
1098 if (*slot == item) {
1099 found = iter.index;
1100 break;
1101 }
1102 checked++;
1103 if ((checked % 4096) != 0)
1104 continue;
1105 slot = radix_tree_iter_resume(slot, &iter);
1106 cond_resched_rcu();
1107 }
1108
1109 rcu_read_unlock();
1110 return found;
1111}
1112
1113
1114
1115
1116static int shmem_unuse_inode(struct shmem_inode_info *info,
1117 swp_entry_t swap, struct page **pagep)
1118{
1119 struct address_space *mapping = info->vfs_inode.i_mapping;
1120 void *radswap;
1121 pgoff_t index;
1122 gfp_t gfp;
1123 int error = 0;
1124
1125 radswap = swp_to_radix_entry(swap);
1126 index = find_swap_entry(&mapping->page_tree, radswap);
1127 if (index == -1)
1128 return -EAGAIN;
1129
1130
1131
1132
1133
1134
1135
1136 if (shmem_swaplist.next != &info->swaplist)
1137 list_move_tail(&shmem_swaplist, &info->swaplist);
1138
1139 gfp = mapping_gfp_mask(mapping);
1140 if (shmem_should_replace_page(*pagep, gfp)) {
1141 mutex_unlock(&shmem_swaplist_mutex);
1142 error = shmem_replace_page(pagep, gfp, info, index);
1143 mutex_lock(&shmem_swaplist_mutex);
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162 if (!page_swapcount(*pagep))
1163 error = -ENOENT;
1164 }
1165
1166
1167
1168
1169
1170
1171 if (!error)
1172 error = shmem_add_to_page_cache(*pagep, mapping, index,
1173 radswap);
1174 if (error != -ENOMEM) {
1175
1176
1177
1178
1179 delete_from_swap_cache(*pagep);
1180 set_page_dirty(*pagep);
1181 if (!error) {
1182 spin_lock_irq(&info->lock);
1183 info->swapped--;
1184 spin_unlock_irq(&info->lock);
1185 swap_free(swap);
1186 }
1187 }
1188 return error;
1189}
1190
1191
1192
1193
1194int shmem_unuse(swp_entry_t swap, struct page *page)
1195{
1196 struct list_head *this, *next;
1197 struct shmem_inode_info *info;
1198 struct mem_cgroup *memcg;
1199 int error = 0;
1200
1201
1202
1203
1204
1205 if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
1206 goto out;
1207
1208
1209
1210
1211
1212
1213 error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
1214 false);
1215 if (error)
1216 goto out;
1217
1218 error = -EAGAIN;
1219
1220 mutex_lock(&shmem_swaplist_mutex);
1221 list_for_each_safe(this, next, &shmem_swaplist) {
1222 info = list_entry(this, struct shmem_inode_info, swaplist);
1223 if (info->swapped)
1224 error = shmem_unuse_inode(info, swap, &page);
1225 else
1226 list_del_init(&info->swaplist);
1227 cond_resched();
1228 if (error != -EAGAIN)
1229 break;
1230
1231 }
1232 mutex_unlock(&shmem_swaplist_mutex);
1233
1234 if (error) {
1235 if (error != -ENOMEM)
1236 error = 0;
1237 mem_cgroup_cancel_charge(page, memcg, false);
1238 } else
1239 mem_cgroup_commit_charge(page, memcg, true, false);
1240out:
1241 unlock_page(page);
1242 put_page(page);
1243 return error;
1244}
1245
1246
1247
1248
1249static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1250{
1251 struct shmem_inode_info *info;
1252 struct address_space *mapping;
1253 struct inode *inode;
1254 swp_entry_t swap;
1255 pgoff_t index;
1256
1257 VM_BUG_ON_PAGE(PageCompound(page), page);
1258 BUG_ON(!PageLocked(page));
1259 mapping = page->mapping;
1260 index = page->index;
1261 inode = mapping->host;
1262 info = SHMEM_I(inode);
1263 if (info->flags & VM_LOCKED)
1264 goto redirty;
1265 if (!total_swap_pages)
1266 goto redirty;
1267
1268
1269
1270
1271
1272
1273
1274
1275 if (!wbc->for_reclaim) {
1276 WARN_ON_ONCE(1);
1277 goto redirty;
1278 }
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 if (!PageUptodate(page)) {
1292 if (inode->i_private) {
1293 struct shmem_falloc *shmem_falloc;
1294 spin_lock(&inode->i_lock);
1295 shmem_falloc = inode->i_private;
1296 if (shmem_falloc &&
1297 !shmem_falloc->waitq &&
1298 index >= shmem_falloc->start &&
1299 index < shmem_falloc->next)
1300 shmem_falloc->nr_unswapped++;
1301 else
1302 shmem_falloc = NULL;
1303 spin_unlock(&inode->i_lock);
1304 if (shmem_falloc)
1305 goto redirty;
1306 }
1307 clear_highpage(page);
1308 flush_dcache_page(page);
1309 SetPageUptodate(page);
1310 }
1311
1312 swap = get_swap_page(page);
1313 if (!swap.val)
1314 goto redirty;
1315
1316 if (mem_cgroup_try_charge_swap(page, swap))
1317 goto free_swap;
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 mutex_lock(&shmem_swaplist_mutex);
1328 if (list_empty(&info->swaplist))
1329 list_add_tail(&info->swaplist, &shmem_swaplist);
1330
1331 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
1332 spin_lock_irq(&info->lock);
1333 shmem_recalc_inode(inode);
1334 info->swapped++;
1335 spin_unlock_irq(&info->lock);
1336
1337 swap_shmem_alloc(swap);
1338 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
1339
1340 mutex_unlock(&shmem_swaplist_mutex);
1341 BUG_ON(page_mapped(page));
1342 swap_writepage(page, wbc);
1343 return 0;
1344 }
1345
1346 mutex_unlock(&shmem_swaplist_mutex);
1347free_swap:
1348 put_swap_page(page, swap);
1349redirty:
1350 set_page_dirty(page);
1351 if (wbc->for_reclaim)
1352 return AOP_WRITEPAGE_ACTIVATE;
1353 unlock_page(page);
1354 return 0;
1355}
1356
1357#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
1358static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1359{
1360 char buffer[64];
1361
1362 if (!mpol || mpol->mode == MPOL_DEFAULT)
1363 return;
1364
1365 mpol_to_str(buffer, sizeof(buffer), mpol);
1366
1367 seq_printf(seq, ",mpol=%s", buffer);
1368}
1369
1370static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1371{
1372 struct mempolicy *mpol = NULL;
1373 if (sbinfo->mpol) {
1374 spin_lock(&sbinfo->stat_lock);
1375 mpol = sbinfo->mpol;
1376 mpol_get(mpol);
1377 spin_unlock(&sbinfo->stat_lock);
1378 }
1379 return mpol;
1380}
1381#else
1382static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1383{
1384}
1385static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1386{
1387 return NULL;
1388}
1389#endif
1390#ifndef CONFIG_NUMA
1391#define vm_policy vm_private_data
1392#endif
1393
1394static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
1395 struct shmem_inode_info *info, pgoff_t index)
1396{
1397
1398 vma->vm_start = 0;
1399
1400 vma->vm_pgoff = index + info->vfs_inode.i_ino;
1401 vma->vm_ops = NULL;
1402 vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
1403}
1404
1405static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
1406{
1407
1408 mpol_cond_put(vma->vm_policy);
1409}
1410
1411static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1412 struct shmem_inode_info *info, pgoff_t index)
1413{
1414 struct vm_area_struct pvma;
1415 struct page *page;
1416
1417 shmem_pseudo_vma_init(&pvma, info, index);
1418 page = swapin_readahead(swap, gfp, &pvma, 0);
1419 shmem_pseudo_vma_destroy(&pvma);
1420
1421 return page;
1422}
1423
1424static struct page *shmem_alloc_hugepage(gfp_t gfp,
1425 struct shmem_inode_info *info, pgoff_t index)
1426{
1427 struct vm_area_struct pvma;
1428 struct inode *inode = &info->vfs_inode;
1429 struct address_space *mapping = inode->i_mapping;
1430 pgoff_t idx, hindex;
1431 void __rcu **results;
1432 struct page *page;
1433
1434 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
1435 return NULL;
1436
1437 hindex = round_down(index, HPAGE_PMD_NR);
1438 rcu_read_lock();
1439 if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
1440 hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
1441 rcu_read_unlock();
1442 return NULL;
1443 }
1444 rcu_read_unlock();
1445
1446 shmem_pseudo_vma_init(&pvma, info, hindex);
1447 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
1448 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
1449 shmem_pseudo_vma_destroy(&pvma);
1450 if (page)
1451 prep_transhuge_page(page);
1452 return page;
1453}
1454
1455static struct page *shmem_alloc_page(gfp_t gfp,
1456 struct shmem_inode_info *info, pgoff_t index)
1457{
1458 struct vm_area_struct pvma;
1459 struct page *page;
1460
1461 shmem_pseudo_vma_init(&pvma, info, index);
1462 page = alloc_page_vma(gfp, &pvma, 0);
1463 shmem_pseudo_vma_destroy(&pvma);
1464
1465 return page;
1466}
1467
1468static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
1469 struct inode *inode,
1470 pgoff_t index, bool huge)
1471{
1472 struct shmem_inode_info *info = SHMEM_I(inode);
1473 struct page *page;
1474 int nr;
1475 int err = -ENOSPC;
1476
1477 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
1478 huge = false;
1479 nr = huge ? HPAGE_PMD_NR : 1;
1480
1481 if (!shmem_inode_acct_block(inode, nr))
1482 goto failed;
1483
1484 if (huge)
1485 page = shmem_alloc_hugepage(gfp, info, index);
1486 else
1487 page = shmem_alloc_page(gfp, info, index);
1488 if (page) {
1489 __SetPageLocked(page);
1490 __SetPageSwapBacked(page);
1491 return page;
1492 }
1493
1494 err = -ENOMEM;
1495 shmem_inode_unacct_blocks(inode, nr);
1496failed:
1497 return ERR_PTR(err);
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
1513{
1514 return page_zonenum(page) > gfp_zone(gfp);
1515}
1516
1517static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1518 struct shmem_inode_info *info, pgoff_t index)
1519{
1520 struct page *oldpage, *newpage;
1521 struct address_space *swap_mapping;
1522 pgoff_t swap_index;
1523 int error;
1524
1525 oldpage = *pagep;
1526 swap_index = page_private(oldpage);
1527 swap_mapping = page_mapping(oldpage);
1528
1529
1530
1531
1532
1533 gfp &= ~GFP_CONSTRAINT_MASK;
1534 newpage = shmem_alloc_page(gfp, info, index);
1535 if (!newpage)
1536 return -ENOMEM;
1537
1538 get_page(newpage);
1539 copy_highpage(newpage, oldpage);
1540 flush_dcache_page(newpage);
1541
1542 __SetPageLocked(newpage);
1543 __SetPageSwapBacked(newpage);
1544 SetPageUptodate(newpage);
1545 set_page_private(newpage, swap_index);
1546 SetPageSwapCache(newpage);
1547
1548
1549
1550
1551
1552 spin_lock_irq(&swap_mapping->tree_lock);
1553 error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1554 newpage);
1555 if (!error) {
1556 __inc_node_page_state(newpage, NR_FILE_PAGES);
1557 __dec_node_page_state(oldpage, NR_FILE_PAGES);
1558 }
1559 spin_unlock_irq(&swap_mapping->tree_lock);
1560
1561 if (unlikely(error)) {
1562
1563
1564
1565
1566
1567 oldpage = newpage;
1568 } else {
1569 mem_cgroup_migrate(oldpage, newpage);
1570 lru_cache_add_anon(newpage);
1571 *pagep = newpage;
1572 }
1573
1574 ClearPageSwapCache(oldpage);
1575 set_page_private(oldpage, 0);
1576
1577 unlock_page(oldpage);
1578 put_page(oldpage);
1579 put_page(oldpage);
1580 return error;
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1594 struct page **pagep, enum sgp_type sgp, gfp_t gfp,
1595 struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type)
1596{
1597 struct address_space *mapping = inode->i_mapping;
1598 struct shmem_inode_info *info = SHMEM_I(inode);
1599 struct shmem_sb_info *sbinfo;
1600 struct mm_struct *charge_mm;
1601 struct mem_cgroup *memcg;
1602 struct page *page;
1603 swp_entry_t swap;
1604 enum sgp_type sgp_huge = sgp;
1605 pgoff_t hindex = index;
1606 int error;
1607 int once = 0;
1608 int alloced = 0;
1609
1610 if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
1611 return -EFBIG;
1612 if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
1613 sgp = SGP_CACHE;
1614repeat:
1615 swap.val = 0;
1616 page = find_lock_entry(mapping, index);
1617 if (radix_tree_exceptional_entry(page)) {
1618 swap = radix_to_swp_entry(page);
1619 page = NULL;
1620 }
1621
1622 if (sgp <= SGP_CACHE &&
1623 ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
1624 error = -EINVAL;
1625 goto unlock;
1626 }
1627
1628 if (page && sgp == SGP_WRITE)
1629 mark_page_accessed(page);
1630
1631
1632 if (page && !PageUptodate(page)) {
1633 if (sgp != SGP_READ)
1634 goto clear;
1635 unlock_page(page);
1636 put_page(page);
1637 page = NULL;
1638 }
1639 if (page || (sgp == SGP_READ && !swap.val)) {
1640 *pagep = page;
1641 return 0;
1642 }
1643
1644
1645
1646
1647
1648 sbinfo = SHMEM_SB(inode->i_sb);
1649 charge_mm = vma ? vma->vm_mm : current->mm;
1650
1651 if (swap.val) {
1652
1653 page = lookup_swap_cache(swap, NULL, 0);
1654 if (!page) {
1655
1656 if (fault_type) {
1657 *fault_type |= VM_FAULT_MAJOR;
1658 count_vm_event(PGMAJFAULT);
1659 count_memcg_event_mm(charge_mm, PGMAJFAULT);
1660 }
1661
1662 page = shmem_swapin(swap, gfp, info, index);
1663 if (!page) {
1664 error = -ENOMEM;
1665 goto failed;
1666 }
1667 }
1668
1669
1670 lock_page(page);
1671 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1672 !shmem_confirm_swap(mapping, index, swap)) {
1673 error = -EEXIST;
1674 goto unlock;
1675 }
1676 if (!PageUptodate(page)) {
1677 error = -EIO;
1678 goto failed;
1679 }
1680 wait_on_page_writeback(page);
1681
1682 if (shmem_should_replace_page(page, gfp)) {
1683 error = shmem_replace_page(&page, gfp, info, index);
1684 if (error)
1685 goto failed;
1686 }
1687
1688 error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
1689 false);
1690 if (!error) {
1691 error = shmem_add_to_page_cache(page, mapping, index,
1692 swp_to_radix_entry(swap));
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705 if (error) {
1706 mem_cgroup_cancel_charge(page, memcg, false);
1707 delete_from_swap_cache(page);
1708 }
1709 }
1710 if (error)
1711 goto failed;
1712
1713 mem_cgroup_commit_charge(page, memcg, true, false);
1714
1715 spin_lock_irq(&info->lock);
1716 info->swapped--;
1717 shmem_recalc_inode(inode);
1718 spin_unlock_irq(&info->lock);
1719
1720 if (sgp == SGP_WRITE)
1721 mark_page_accessed(page);
1722
1723 delete_from_swap_cache(page);
1724 set_page_dirty(page);
1725 swap_free(swap);
1726
1727 } else {
1728 if (vma && userfaultfd_missing(vma)) {
1729 *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
1730 return 0;
1731 }
1732
1733
1734 if (mapping->a_ops != &shmem_aops)
1735 goto alloc_nohuge;
1736 if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
1737 goto alloc_nohuge;
1738 if (shmem_huge == SHMEM_HUGE_FORCE)
1739 goto alloc_huge;
1740 switch (sbinfo->huge) {
1741 loff_t i_size;
1742 pgoff_t off;
1743 case SHMEM_HUGE_NEVER:
1744 goto alloc_nohuge;
1745 case SHMEM_HUGE_WITHIN_SIZE:
1746 off = round_up(index, HPAGE_PMD_NR);
1747 i_size = round_up(i_size_read(inode), PAGE_SIZE);
1748 if (i_size >= HPAGE_PMD_SIZE &&
1749 i_size >> PAGE_SHIFT >= off)
1750 goto alloc_huge;
1751
1752 case SHMEM_HUGE_ADVISE:
1753 if (sgp_huge == SGP_HUGE)
1754 goto alloc_huge;
1755
1756 goto alloc_nohuge;
1757 }
1758
1759alloc_huge:
1760 page = shmem_alloc_and_acct_page(gfp, inode, index, true);
1761 if (IS_ERR(page)) {
1762alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode,
1763 index, false);
1764 }
1765 if (IS_ERR(page)) {
1766 int retry = 5;
1767 error = PTR_ERR(page);
1768 page = NULL;
1769 if (error != -ENOSPC)
1770 goto failed;
1771
1772
1773
1774
1775 while (retry--) {
1776 int ret;
1777 ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
1778 if (ret == SHRINK_STOP)
1779 break;
1780 if (ret)
1781 goto alloc_nohuge;
1782 }
1783 goto failed;
1784 }
1785
1786 if (PageTransHuge(page))
1787 hindex = round_down(index, HPAGE_PMD_NR);
1788 else
1789 hindex = index;
1790
1791 if (sgp == SGP_WRITE)
1792 __SetPageReferenced(page);
1793
1794 error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
1795 PageTransHuge(page));
1796 if (error)
1797 goto unacct;
1798 error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
1799 compound_order(page));
1800 if (!error) {
1801 error = shmem_add_to_page_cache(page, mapping, hindex,
1802 NULL);
1803 radix_tree_preload_end();
1804 }
1805 if (error) {
1806 mem_cgroup_cancel_charge(page, memcg,
1807 PageTransHuge(page));
1808 goto unacct;
1809 }
1810 mem_cgroup_commit_charge(page, memcg, false,
1811 PageTransHuge(page));
1812 lru_cache_add_anon(page);
1813
1814 spin_lock_irq(&info->lock);
1815 info->alloced += 1 << compound_order(page);
1816 inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
1817 shmem_recalc_inode(inode);
1818 spin_unlock_irq(&info->lock);
1819 alloced = true;
1820
1821 if (PageTransHuge(page) &&
1822 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
1823 hindex + HPAGE_PMD_NR - 1) {
1824
1825
1826
1827
1828 spin_lock(&sbinfo->shrinklist_lock);
1829
1830
1831
1832
1833 if (list_empty_careful(&info->shrinklist)) {
1834 list_add_tail(&info->shrinklist,
1835 &sbinfo->shrinklist);
1836 sbinfo->shrinklist_len++;
1837 }
1838 spin_unlock(&sbinfo->shrinklist_lock);
1839 }
1840
1841
1842
1843
1844 if (sgp == SGP_FALLOC)
1845 sgp = SGP_WRITE;
1846clear:
1847
1848
1849
1850
1851
1852 if (sgp != SGP_WRITE && !PageUptodate(page)) {
1853 struct page *head = compound_head(page);
1854 int i;
1855
1856 for (i = 0; i < (1 << compound_order(head)); i++) {
1857 clear_highpage(head + i);
1858 flush_dcache_page(head + i);
1859 }
1860 SetPageUptodate(head);
1861 }
1862 }
1863
1864
1865 if (sgp <= SGP_CACHE &&
1866 ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
1867 if (alloced) {
1868 ClearPageDirty(page);
1869 delete_from_page_cache(page);
1870 spin_lock_irq(&info->lock);
1871 shmem_recalc_inode(inode);
1872 spin_unlock_irq(&info->lock);
1873 }
1874 error = -EINVAL;
1875 goto unlock;
1876 }
1877 *pagep = page + index - hindex;
1878 return 0;
1879
1880
1881
1882
1883unacct:
1884 shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
1885
1886 if (PageTransHuge(page)) {
1887 unlock_page(page);
1888 put_page(page);
1889 goto alloc_nohuge;
1890 }
1891failed:
1892 if (swap.val && !shmem_confirm_swap(mapping, index, swap))
1893 error = -EEXIST;
1894unlock:
1895 if (page) {
1896 unlock_page(page);
1897 put_page(page);
1898 }
1899 if (error == -ENOSPC && !once++) {
1900 spin_lock_irq(&info->lock);
1901 shmem_recalc_inode(inode);
1902 spin_unlock_irq(&info->lock);
1903 goto repeat;
1904 }
1905 if (error == -EEXIST)
1906 goto repeat;
1907 return error;
1908}
1909
1910
1911
1912
1913
1914
1915static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
1916{
1917 int ret = default_wake_function(wait, mode, sync, key);
1918 list_del_init(&wait->entry);
1919 return ret;
1920}
1921
1922static int shmem_fault(struct vm_fault *vmf)
1923{
1924 struct vm_area_struct *vma = vmf->vma;
1925 struct inode *inode = file_inode(vma->vm_file);
1926 gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
1927 enum sgp_type sgp;
1928 int error;
1929 int ret = VM_FAULT_LOCKED;
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948 if (unlikely(inode->i_private)) {
1949 struct shmem_falloc *shmem_falloc;
1950
1951 spin_lock(&inode->i_lock);
1952 shmem_falloc = inode->i_private;
1953 if (shmem_falloc &&
1954 shmem_falloc->waitq &&
1955 vmf->pgoff >= shmem_falloc->start &&
1956 vmf->pgoff < shmem_falloc->next) {
1957 wait_queue_head_t *shmem_falloc_waitq;
1958 DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
1959
1960 ret = VM_FAULT_NOPAGE;
1961 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
1962 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
1963
1964 up_read(&vma->vm_mm->mmap_sem);
1965 ret = VM_FAULT_RETRY;
1966 }
1967
1968 shmem_falloc_waitq = shmem_falloc->waitq;
1969 prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
1970 TASK_UNINTERRUPTIBLE);
1971 spin_unlock(&inode->i_lock);
1972 schedule();
1973
1974
1975
1976
1977
1978
1979
1980
1981 spin_lock(&inode->i_lock);
1982 finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
1983 spin_unlock(&inode->i_lock);
1984 return ret;
1985 }
1986 spin_unlock(&inode->i_lock);
1987 }
1988
1989 sgp = SGP_CACHE;
1990
1991 if ((vma->vm_flags & VM_NOHUGEPAGE) ||
1992 test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
1993 sgp = SGP_NOHUGE;
1994 else if (vma->vm_flags & VM_HUGEPAGE)
1995 sgp = SGP_HUGE;
1996
1997 error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
1998 gfp, vma, vmf, &ret);
1999 if (error)
2000 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
2001 return ret;
2002}
2003
2004unsigned long shmem_get_unmapped_area(struct file *file,
2005 unsigned long uaddr, unsigned long len,
2006 unsigned long pgoff, unsigned long flags)
2007{
2008 unsigned long (*get_area)(struct file *,
2009 unsigned long, unsigned long, unsigned long, unsigned long);
2010 unsigned long addr;
2011 unsigned long offset;
2012 unsigned long inflated_len;
2013 unsigned long inflated_addr;
2014 unsigned long inflated_offset;
2015
2016 if (len > TASK_SIZE)
2017 return -ENOMEM;
2018
2019 get_area = current->mm->get_unmapped_area;
2020 addr = get_area(file, uaddr, len, pgoff, flags);
2021
2022 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
2023 return addr;
2024 if (IS_ERR_VALUE(addr))
2025 return addr;
2026 if (addr & ~PAGE_MASK)
2027 return addr;
2028 if (addr > TASK_SIZE - len)
2029 return addr;
2030
2031 if (shmem_huge == SHMEM_HUGE_DENY)
2032 return addr;
2033 if (len < HPAGE_PMD_SIZE)
2034 return addr;
2035 if (flags & MAP_FIXED)
2036 return addr;
2037
2038
2039
2040
2041
2042 if (uaddr)
2043 return addr;
2044
2045 if (shmem_huge != SHMEM_HUGE_FORCE) {
2046 struct super_block *sb;
2047
2048 if (file) {
2049 VM_BUG_ON(file->f_op != &shmem_file_operations);
2050 sb = file_inode(file)->i_sb;
2051 } else {
2052
2053
2054
2055
2056 if (IS_ERR(shm_mnt))
2057 return addr;
2058 sb = shm_mnt->mnt_sb;
2059 }
2060 if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
2061 return addr;
2062 }
2063
2064 offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
2065 if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
2066 return addr;
2067 if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
2068 return addr;
2069
2070 inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
2071 if (inflated_len > TASK_SIZE)
2072 return addr;
2073 if (inflated_len < len)
2074 return addr;
2075
2076 inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
2077 if (IS_ERR_VALUE(inflated_addr))
2078 return addr;
2079 if (inflated_addr & ~PAGE_MASK)
2080 return addr;
2081
2082 inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
2083 inflated_addr += offset - inflated_offset;
2084 if (inflated_offset > offset)
2085 inflated_addr += HPAGE_PMD_SIZE;
2086
2087 if (inflated_addr > TASK_SIZE - len)
2088 return addr;
2089 return inflated_addr;
2090}
2091
2092#ifdef CONFIG_NUMA
2093static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
2094{
2095 struct inode *inode = file_inode(vma->vm_file);
2096 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
2097}
2098
2099static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
2100 unsigned long addr)
2101{
2102 struct inode *inode = file_inode(vma->vm_file);
2103 pgoff_t index;
2104
2105 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2106 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
2107}
2108#endif
2109
2110int shmem_lock(struct file *file, int lock, struct user_struct *user)
2111{
2112 struct inode *inode = file_inode(file);
2113 struct shmem_inode_info *info = SHMEM_I(inode);
2114 int retval = -ENOMEM;
2115
2116 spin_lock_irq(&info->lock);
2117 if (lock && !(info->flags & VM_LOCKED)) {
2118 if (!user_shm_lock(inode->i_size, user))
2119 goto out_nomem;
2120 info->flags |= VM_LOCKED;
2121 mapping_set_unevictable(file->f_mapping);
2122 }
2123 if (!lock && (info->flags & VM_LOCKED) && user) {
2124 user_shm_unlock(inode->i_size, user);
2125 info->flags &= ~VM_LOCKED;
2126 mapping_clear_unevictable(file->f_mapping);
2127 }
2128 retval = 0;
2129
2130out_nomem:
2131 spin_unlock_irq(&info->lock);
2132 return retval;
2133}
2134
2135static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
2136{
2137 file_accessed(file);
2138 vma->vm_ops = &shmem_vm_ops;
2139 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
2140 ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
2141 (vma->vm_end & HPAGE_PMD_MASK)) {
2142 khugepaged_enter(vma, vma->vm_flags);
2143 }
2144 return 0;
2145}
2146
2147static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
2148 umode_t mode, dev_t dev, unsigned long flags)
2149{
2150 struct inode *inode;
2151 struct shmem_inode_info *info;
2152 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2153
2154 if (shmem_reserve_inode(sb))
2155 return NULL;
2156
2157 inode = new_inode(sb);
2158 if (inode) {
2159 inode->i_ino = get_next_ino();
2160 inode_init_owner(inode, dir, mode);
2161 inode->i_blocks = 0;
2162 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
2163 inode->i_generation = get_seconds();
2164 info = SHMEM_I(inode);
2165 memset(info, 0, (char *)inode - (char *)info);
2166 spin_lock_init(&info->lock);
2167 info->seals = F_SEAL_SEAL;
2168 info->flags = flags & VM_NORESERVE;
2169 INIT_LIST_HEAD(&info->shrinklist);
2170 INIT_LIST_HEAD(&info->swaplist);
2171 simple_xattrs_init(&info->xattrs);
2172 cache_no_acl(inode);
2173
2174 switch (mode & S_IFMT) {
2175 default:
2176 inode->i_op = &shmem_special_inode_operations;
2177 init_special_inode(inode, mode, dev);
2178 break;
2179 case S_IFREG:
2180 inode->i_mapping->a_ops = &shmem_aops;
2181 inode->i_op = &shmem_inode_operations;
2182 inode->i_fop = &shmem_file_operations;
2183 mpol_shared_policy_init(&info->policy,
2184 shmem_get_sbmpol(sbinfo));
2185 break;
2186 case S_IFDIR:
2187 inc_nlink(inode);
2188
2189 inode->i_size = 2 * BOGO_DIRENT_SIZE;
2190 inode->i_op = &shmem_dir_inode_operations;
2191 inode->i_fop = &simple_dir_operations;
2192 break;
2193 case S_IFLNK:
2194
2195
2196
2197
2198 mpol_shared_policy_init(&info->policy, NULL);
2199 break;
2200 }
2201 } else
2202 shmem_free_inode(sb);
2203 return inode;
2204}
2205
2206bool shmem_mapping(struct address_space *mapping)
2207{
2208 return mapping->a_ops == &shmem_aops;
2209}
2210
2211static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
2212 pmd_t *dst_pmd,
2213 struct vm_area_struct *dst_vma,
2214 unsigned long dst_addr,
2215 unsigned long src_addr,
2216 bool zeropage,
2217 struct page **pagep)
2218{
2219 struct inode *inode = file_inode(dst_vma->vm_file);
2220 struct shmem_inode_info *info = SHMEM_I(inode);
2221 struct address_space *mapping = inode->i_mapping;
2222 gfp_t gfp = mapping_gfp_mask(mapping);
2223 pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
2224 struct mem_cgroup *memcg;
2225 spinlock_t *ptl;
2226 void *page_kaddr;
2227 struct page *page;
2228 pte_t _dst_pte, *dst_pte;
2229 int ret;
2230
2231 ret = -ENOMEM;
2232 if (!shmem_inode_acct_block(inode, 1))
2233 goto out;
2234
2235 if (!*pagep) {
2236 page = shmem_alloc_page(gfp, info, pgoff);
2237 if (!page)
2238 goto out_unacct_blocks;
2239
2240 if (!zeropage) {
2241 page_kaddr = kmap_atomic(page);
2242 ret = copy_from_user(page_kaddr,
2243 (const void __user *)src_addr,
2244 PAGE_SIZE);
2245 kunmap_atomic(page_kaddr);
2246
2247
2248 if (unlikely(ret)) {
2249 *pagep = page;
2250 shmem_inode_unacct_blocks(inode, 1);
2251
2252 return -EFAULT;
2253 }
2254 } else {
2255 clear_highpage(page);
2256 }
2257 } else {
2258 page = *pagep;
2259 *pagep = NULL;
2260 }
2261
2262 VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
2263 __SetPageLocked(page);
2264 __SetPageSwapBacked(page);
2265 __SetPageUptodate(page);
2266
2267 ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
2268 if (ret)
2269 goto out_release;
2270
2271 ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
2272 if (!ret) {
2273 ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
2274 radix_tree_preload_end();
2275 }
2276 if (ret)
2277 goto out_release_uncharge;
2278
2279 mem_cgroup_commit_charge(page, memcg, false, false);
2280
2281 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
2282 if (dst_vma->vm_flags & VM_WRITE)
2283 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
2284
2285 ret = -EEXIST;
2286 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
2287 if (!pte_none(*dst_pte))
2288 goto out_release_uncharge_unlock;
2289
2290 lru_cache_add_anon(page);
2291
2292 spin_lock(&info->lock);
2293 info->alloced++;
2294 inode->i_blocks += BLOCKS_PER_PAGE;
2295 shmem_recalc_inode(inode);
2296 spin_unlock(&info->lock);
2297
2298 inc_mm_counter(dst_mm, mm_counter_file(page));
2299 page_add_file_rmap(page, false);
2300 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
2301
2302
2303 update_mmu_cache(dst_vma, dst_addr, dst_pte);
2304 unlock_page(page);
2305 pte_unmap_unlock(dst_pte, ptl);
2306 ret = 0;
2307out:
2308 return ret;
2309out_release_uncharge_unlock:
2310 pte_unmap_unlock(dst_pte, ptl);
2311out_release_uncharge:
2312 mem_cgroup_cancel_charge(page, memcg, false);
2313out_release:
2314 unlock_page(page);
2315 put_page(page);
2316out_unacct_blocks:
2317 shmem_inode_unacct_blocks(inode, 1);
2318 goto out;
2319}
2320
2321int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
2322 pmd_t *dst_pmd,
2323 struct vm_area_struct *dst_vma,
2324 unsigned long dst_addr,
2325 unsigned long src_addr,
2326 struct page **pagep)
2327{
2328 return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
2329 dst_addr, src_addr, false, pagep);
2330}
2331
2332int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
2333 pmd_t *dst_pmd,
2334 struct vm_area_struct *dst_vma,
2335 unsigned long dst_addr)
2336{
2337 struct page *page = NULL;
2338
2339 return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
2340 dst_addr, 0, true, &page);
2341}
2342
2343#ifdef CONFIG_TMPFS
2344static const struct inode_operations shmem_symlink_inode_operations;
2345static const struct inode_operations shmem_short_symlink_operations;
2346
2347#ifdef CONFIG_TMPFS_XATTR
2348static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
2349#else
2350#define shmem_initxattrs NULL
2351#endif
2352
2353static int
2354shmem_write_begin(struct file *file, struct address_space *mapping,
2355 loff_t pos, unsigned len, unsigned flags,
2356 struct page **pagep, void **fsdata)
2357{
2358 struct inode *inode = mapping->host;
2359 struct shmem_inode_info *info = SHMEM_I(inode);
2360 pgoff_t index = pos >> PAGE_SHIFT;
2361
2362
2363 if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
2364 if (info->seals & F_SEAL_WRITE)
2365 return -EPERM;
2366 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
2367 return -EPERM;
2368 }
2369
2370 return shmem_getpage(inode, index, pagep, SGP_WRITE);
2371}
2372
2373static int
2374shmem_write_end(struct file *file, struct address_space *mapping,
2375 loff_t pos, unsigned len, unsigned copied,
2376 struct page *page, void *fsdata)
2377{
2378 struct inode *inode = mapping->host;
2379
2380 if (pos + copied > inode->i_size)
2381 i_size_write(inode, pos + copied);
2382
2383 if (!PageUptodate(page)) {
2384 struct page *head = compound_head(page);
2385 if (PageTransCompound(page)) {
2386 int i;
2387
2388 for (i = 0; i < HPAGE_PMD_NR; i++) {
2389 if (head + i == page)
2390 continue;
2391 clear_highpage(head + i);
2392 flush_dcache_page(head + i);
2393 }
2394 }
2395 if (copied < PAGE_SIZE) {
2396 unsigned from = pos & (PAGE_SIZE - 1);
2397 zero_user_segments(page, 0, from,
2398 from + copied, PAGE_SIZE);
2399 }
2400 SetPageUptodate(head);
2401 }
2402 set_page_dirty(page);
2403 unlock_page(page);
2404 put_page(page);
2405
2406 return copied;
2407}
2408
2409static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
2410{
2411 struct file *file = iocb->ki_filp;
2412 struct inode *inode = file_inode(file);
2413 struct address_space *mapping = inode->i_mapping;
2414 pgoff_t index;
2415 unsigned long offset;
2416 enum sgp_type sgp = SGP_READ;
2417 int error = 0;
2418 ssize_t retval = 0;
2419 loff_t *ppos = &iocb->ki_pos;
2420
2421
2422
2423
2424
2425
2426 if (!iter_is_iovec(to))
2427 sgp = SGP_CACHE;
2428
2429 index = *ppos >> PAGE_SHIFT;
2430 offset = *ppos & ~PAGE_MASK;
2431
2432 for (;;) {
2433 struct page *page = NULL;
2434 pgoff_t end_index;
2435 unsigned long nr, ret;
2436 loff_t i_size = i_size_read(inode);
2437
2438 end_index = i_size >> PAGE_SHIFT;
2439 if (index > end_index)
2440 break;
2441 if (index == end_index) {
2442 nr = i_size & ~PAGE_MASK;
2443 if (nr <= offset)
2444 break;
2445 }
2446
2447 error = shmem_getpage(inode, index, &page, sgp);
2448 if (error) {
2449 if (error == -EINVAL)
2450 error = 0;
2451 break;
2452 }
2453 if (page) {
2454 if (sgp == SGP_CACHE)
2455 set_page_dirty(page);
2456 unlock_page(page);
2457 }
2458
2459
2460
2461
2462
2463 nr = PAGE_SIZE;
2464 i_size = i_size_read(inode);
2465 end_index = i_size >> PAGE_SHIFT;
2466 if (index == end_index) {
2467 nr = i_size & ~PAGE_MASK;
2468 if (nr <= offset) {
2469 if (page)
2470 put_page(page);
2471 break;
2472 }
2473 }
2474 nr -= offset;
2475
2476 if (page) {
2477
2478
2479
2480
2481
2482 if (mapping_writably_mapped(mapping))
2483 flush_dcache_page(page);
2484
2485
2486
2487 if (!offset)
2488 mark_page_accessed(page);
2489 } else {
2490 page = ZERO_PAGE(0);
2491 get_page(page);
2492 }
2493
2494
2495
2496
2497
2498 ret = copy_page_to_iter(page, offset, nr, to);
2499 retval += ret;
2500 offset += ret;
2501 index += offset >> PAGE_SHIFT;
2502 offset &= ~PAGE_MASK;
2503
2504 put_page(page);
2505 if (!iov_iter_count(to))
2506 break;
2507 if (ret < nr) {
2508 error = -EFAULT;
2509 break;
2510 }
2511 cond_resched();
2512 }
2513
2514 *ppos = ((loff_t) index << PAGE_SHIFT) + offset;
2515 file_accessed(file);
2516 return retval ? retval : error;
2517}
2518
2519
2520
2521
2522static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
2523 pgoff_t index, pgoff_t end, int whence)
2524{
2525 struct page *page;
2526 struct pagevec pvec;
2527 pgoff_t indices[PAGEVEC_SIZE];
2528 bool done = false;
2529 int i;
2530
2531 pagevec_init(&pvec, 0);
2532 pvec.nr = 1;
2533 while (!done) {
2534 pvec.nr = find_get_entries(mapping, index,
2535 pvec.nr, pvec.pages, indices);
2536 if (!pvec.nr) {
2537 if (whence == SEEK_DATA)
2538 index = end;
2539 break;
2540 }
2541 for (i = 0; i < pvec.nr; i++, index++) {
2542 if (index < indices[i]) {
2543 if (whence == SEEK_HOLE) {
2544 done = true;
2545 break;
2546 }
2547 index = indices[i];
2548 }
2549 page = pvec.pages[i];
2550 if (page && !radix_tree_exceptional_entry(page)) {
2551 if (!PageUptodate(page))
2552 page = NULL;
2553 }
2554 if (index >= end ||
2555 (page && whence == SEEK_DATA) ||
2556 (!page && whence == SEEK_HOLE)) {
2557 done = true;
2558 break;
2559 }
2560 }
2561 pagevec_remove_exceptionals(&pvec);
2562 pagevec_release(&pvec);
2563 pvec.nr = PAGEVEC_SIZE;
2564 cond_resched();
2565 }
2566 return index;
2567}
2568
2569static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
2570{
2571 struct address_space *mapping = file->f_mapping;
2572 struct inode *inode = mapping->host;
2573 pgoff_t start, end;
2574 loff_t new_offset;
2575
2576 if (whence != SEEK_DATA && whence != SEEK_HOLE)
2577 return generic_file_llseek_size(file, offset, whence,
2578 MAX_LFS_FILESIZE, i_size_read(inode));
2579 inode_lock(inode);
2580
2581
2582 if (offset < 0)
2583 offset = -EINVAL;
2584 else if (offset >= inode->i_size)
2585 offset = -ENXIO;
2586 else {
2587 start = offset >> PAGE_SHIFT;
2588 end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
2589 new_offset = shmem_seek_hole_data(mapping, start, end, whence);
2590 new_offset <<= PAGE_SHIFT;
2591 if (new_offset > offset) {
2592 if (new_offset < inode->i_size)
2593 offset = new_offset;
2594 else if (whence == SEEK_DATA)
2595 offset = -ENXIO;
2596 else
2597 offset = inode->i_size;
2598 }
2599 }
2600
2601 if (offset >= 0)
2602 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
2603 inode_unlock(inode);
2604 return offset;
2605}
2606
2607
2608
2609
2610
2611#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
2612#define LAST_SCAN 4
2613
2614static void shmem_tag_pins(struct address_space *mapping)
2615{
2616 struct radix_tree_iter iter;
2617 void **slot;
2618 pgoff_t start;
2619 struct page *page;
2620
2621 lru_add_drain();
2622 start = 0;
2623 rcu_read_lock();
2624
2625 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
2626 page = radix_tree_deref_slot(slot);
2627 if (!page || radix_tree_exception(page)) {
2628 if (radix_tree_deref_retry(page)) {
2629 slot = radix_tree_iter_retry(&iter);
2630 continue;
2631 }
2632 } else if (page_count(page) - page_mapcount(page) > 1) {
2633 spin_lock_irq(&mapping->tree_lock);
2634 radix_tree_tag_set(&mapping->page_tree, iter.index,
2635 SHMEM_TAG_PINNED);
2636 spin_unlock_irq(&mapping->tree_lock);
2637 }
2638
2639 if (need_resched()) {
2640 slot = radix_tree_iter_resume(slot, &iter);
2641 cond_resched_rcu();
2642 }
2643 }
2644 rcu_read_unlock();
2645}
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656static int shmem_wait_for_pins(struct address_space *mapping)
2657{
2658 struct radix_tree_iter iter;
2659 void **slot;
2660 pgoff_t start;
2661 struct page *page;
2662 int error, scan;
2663
2664 shmem_tag_pins(mapping);
2665
2666 error = 0;
2667 for (scan = 0; scan <= LAST_SCAN; scan++) {
2668 if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
2669 break;
2670
2671 if (!scan)
2672 lru_add_drain_all();
2673 else if (schedule_timeout_killable((HZ << scan) / 200))
2674 scan = LAST_SCAN;
2675
2676 start = 0;
2677 rcu_read_lock();
2678 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
2679 start, SHMEM_TAG_PINNED) {
2680
2681 page = radix_tree_deref_slot(slot);
2682 if (radix_tree_exception(page)) {
2683 if (radix_tree_deref_retry(page)) {
2684 slot = radix_tree_iter_retry(&iter);
2685 continue;
2686 }
2687
2688 page = NULL;
2689 }
2690
2691 if (page &&
2692 page_count(page) - page_mapcount(page) != 1) {
2693 if (scan < LAST_SCAN)
2694 goto continue_resched;
2695
2696
2697
2698
2699
2700
2701 error = -EBUSY;
2702 }
2703
2704 spin_lock_irq(&mapping->tree_lock);
2705 radix_tree_tag_clear(&mapping->page_tree,
2706 iter.index, SHMEM_TAG_PINNED);
2707 spin_unlock_irq(&mapping->tree_lock);
2708continue_resched:
2709 if (need_resched()) {
2710 slot = radix_tree_iter_resume(slot, &iter);
2711 cond_resched_rcu();
2712 }
2713 }
2714 rcu_read_unlock();
2715 }
2716
2717 return error;
2718}
2719
2720#define F_ALL_SEALS (F_SEAL_SEAL | \
2721 F_SEAL_SHRINK | \
2722 F_SEAL_GROW | \
2723 F_SEAL_WRITE)
2724
2725int shmem_add_seals(struct file *file, unsigned int seals)
2726{
2727 struct inode *inode = file_inode(file);
2728 struct shmem_inode_info *info = SHMEM_I(inode);
2729 int error;
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761 if (file->f_op != &shmem_file_operations)
2762 return -EINVAL;
2763 if (!(file->f_mode & FMODE_WRITE))
2764 return -EPERM;
2765 if (seals & ~(unsigned int)F_ALL_SEALS)
2766 return -EINVAL;
2767
2768 inode_lock(inode);
2769
2770 if (info->seals & F_SEAL_SEAL) {
2771 error = -EPERM;
2772 goto unlock;
2773 }
2774
2775 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
2776 error = mapping_deny_writable(file->f_mapping);
2777 if (error)
2778 goto unlock;
2779
2780 error = shmem_wait_for_pins(file->f_mapping);
2781 if (error) {
2782 mapping_allow_writable(file->f_mapping);
2783 goto unlock;
2784 }
2785 }
2786
2787 info->seals |= seals;
2788 error = 0;
2789
2790unlock:
2791 inode_unlock(inode);
2792 return error;
2793}
2794EXPORT_SYMBOL_GPL(shmem_add_seals);
2795
2796int shmem_get_seals(struct file *file)
2797{
2798 if (file->f_op != &shmem_file_operations)
2799 return -EINVAL;
2800
2801 return SHMEM_I(file_inode(file))->seals;
2802}
2803EXPORT_SYMBOL_GPL(shmem_get_seals);
2804
2805long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
2806{
2807 long error;
2808
2809 switch (cmd) {
2810 case F_ADD_SEALS:
2811
2812 if (arg > UINT_MAX)
2813 return -EINVAL;
2814
2815 error = shmem_add_seals(file, arg);
2816 break;
2817 case F_GET_SEALS:
2818 error = shmem_get_seals(file);
2819 break;
2820 default:
2821 error = -EINVAL;
2822 break;
2823 }
2824
2825 return error;
2826}
2827
2828static long shmem_fallocate(struct file *file, int mode, loff_t offset,
2829 loff_t len)
2830{
2831 struct inode *inode = file_inode(file);
2832 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
2833 struct shmem_inode_info *info = SHMEM_I(inode);
2834 struct shmem_falloc shmem_falloc;
2835 pgoff_t start, index, end;
2836 int error;
2837
2838 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2839 return -EOPNOTSUPP;
2840
2841 inode_lock(inode);
2842
2843 if (mode & FALLOC_FL_PUNCH_HOLE) {
2844 struct address_space *mapping = file->f_mapping;
2845 loff_t unmap_start = round_up(offset, PAGE_SIZE);
2846 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
2847 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
2848
2849
2850 if (info->seals & F_SEAL_WRITE) {
2851 error = -EPERM;
2852 goto out;
2853 }
2854
2855 shmem_falloc.waitq = &shmem_falloc_waitq;
2856 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
2857 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
2858 spin_lock(&inode->i_lock);
2859 inode->i_private = &shmem_falloc;
2860 spin_unlock(&inode->i_lock);
2861
2862 if ((u64)unmap_end > (u64)unmap_start)
2863 unmap_mapping_range(mapping, unmap_start,
2864 1 + unmap_end - unmap_start, 0);
2865 shmem_truncate_range(inode, offset, offset + len - 1);
2866
2867
2868 spin_lock(&inode->i_lock);
2869 inode->i_private = NULL;
2870 wake_up_all(&shmem_falloc_waitq);
2871 WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
2872 spin_unlock(&inode->i_lock);
2873 error = 0;
2874 goto out;
2875 }
2876
2877
2878 error = inode_newsize_ok(inode, offset + len);
2879 if (error)
2880 goto out;
2881
2882 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
2883 error = -EPERM;
2884 goto out;
2885 }
2886
2887 start = offset >> PAGE_SHIFT;
2888 end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
2889
2890 if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
2891 error = -ENOSPC;
2892 goto out;
2893 }
2894
2895 shmem_falloc.waitq = NULL;
2896 shmem_falloc.start = start;
2897 shmem_falloc.next = start;
2898 shmem_falloc.nr_falloced = 0;
2899 shmem_falloc.nr_unswapped = 0;
2900 spin_lock(&inode->i_lock);
2901 inode->i_private = &shmem_falloc;
2902 spin_unlock(&inode->i_lock);
2903
2904 for (index = start; index < end; index++) {
2905 struct page *page;
2906
2907
2908
2909
2910
2911 if (signal_pending(current))
2912 error = -EINTR;
2913 else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
2914 error = -ENOMEM;
2915 else
2916 error = shmem_getpage(inode, index, &page, SGP_FALLOC);
2917 if (error) {
2918
2919 if (index > start) {
2920 shmem_undo_range(inode,
2921 (loff_t)start << PAGE_SHIFT,
2922 ((loff_t)index << PAGE_SHIFT) - 1, true);
2923 }
2924 goto undone;
2925 }
2926
2927
2928
2929
2930
2931 shmem_falloc.next++;
2932 if (!PageUptodate(page))
2933 shmem_falloc.nr_falloced++;
2934
2935
2936
2937
2938
2939
2940
2941
2942 set_page_dirty(page);
2943 unlock_page(page);
2944 put_page(page);
2945 cond_resched();
2946 }
2947
2948 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
2949 i_size_write(inode, offset + len);
2950 inode->i_ctime = current_time(inode);
2951undone:
2952 spin_lock(&inode->i_lock);
2953 inode->i_private = NULL;
2954 spin_unlock(&inode->i_lock);
2955out:
2956 inode_unlock(inode);
2957 return error;
2958}
2959
2960static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
2961{
2962 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
2963
2964 buf->f_type = TMPFS_MAGIC;
2965 buf->f_bsize = PAGE_SIZE;
2966 buf->f_namelen = NAME_MAX;
2967 if (sbinfo->max_blocks) {
2968 buf->f_blocks = sbinfo->max_blocks;
2969 buf->f_bavail =
2970 buf->f_bfree = sbinfo->max_blocks -
2971 percpu_counter_sum(&sbinfo->used_blocks);
2972 }
2973 if (sbinfo->max_inodes) {
2974 buf->f_files = sbinfo->max_inodes;
2975 buf->f_ffree = sbinfo->free_inodes;
2976 }
2977
2978 return 0;
2979}
2980
2981
2982
2983
2984static int
2985shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2986{
2987 struct inode *inode;
2988 int error = -ENOSPC;
2989
2990 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
2991 if (inode) {
2992 error = simple_acl_create(dir, inode);
2993 if (error)
2994 goto out_iput;
2995 error = security_inode_init_security(inode, dir,
2996 &dentry->d_name,
2997 shmem_initxattrs, NULL);
2998 if (error && error != -EOPNOTSUPP)
2999 goto out_iput;
3000
3001 error = 0;
3002 dir->i_size += BOGO_DIRENT_SIZE;
3003 dir->i_ctime = dir->i_mtime = current_time(dir);
3004 d_instantiate(dentry, inode);
3005 dget(dentry);
3006 }
3007 return error;
3008out_iput:
3009 iput(inode);
3010 return error;
3011}
3012
3013static int
3014shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
3015{
3016 struct inode *inode;
3017 int error = -ENOSPC;
3018
3019 inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
3020 if (inode) {
3021 error = security_inode_init_security(inode, dir,
3022 NULL,
3023 shmem_initxattrs, NULL);
3024 if (error && error != -EOPNOTSUPP)
3025 goto out_iput;
3026 error = simple_acl_create(dir, inode);
3027 if (error)
3028 goto out_iput;
3029 d_tmpfile(dentry, inode);
3030 }
3031 return error;
3032out_iput:
3033 iput(inode);
3034 return error;
3035}
3036
3037static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3038{
3039 int error;
3040
3041 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
3042 return error;
3043 inc_nlink(dir);
3044 return 0;
3045}
3046
3047static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
3048 bool excl)
3049{
3050 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
3051}
3052
3053
3054
3055
3056static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
3057{
3058 struct inode *inode = d_inode(old_dentry);
3059 int ret;
3060
3061
3062
3063
3064
3065
3066 ret = shmem_reserve_inode(inode->i_sb);
3067 if (ret)
3068 goto out;
3069
3070 dir->i_size += BOGO_DIRENT_SIZE;
3071 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
3072 inc_nlink(inode);
3073 ihold(inode);
3074 dget(dentry);
3075 d_instantiate(dentry, inode);
3076out:
3077 return ret;
3078}
3079
3080static int shmem_unlink(struct inode *dir, struct dentry *dentry)
3081{
3082 struct inode *inode = d_inode(dentry);
3083
3084 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
3085 shmem_free_inode(inode->i_sb);
3086
3087 dir->i_size -= BOGO_DIRENT_SIZE;
3088 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
3089 drop_nlink(inode);
3090 dput(dentry);
3091 return 0;
3092}
3093
3094static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
3095{
3096 if (!simple_empty(dentry))
3097 return -ENOTEMPTY;
3098
3099 drop_nlink(d_inode(dentry));
3100 drop_nlink(dir);
3101 return shmem_unlink(dir, dentry);
3102}
3103
3104static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
3105{
3106 bool old_is_dir = d_is_dir(old_dentry);
3107 bool new_is_dir = d_is_dir(new_dentry);
3108
3109 if (old_dir != new_dir && old_is_dir != new_is_dir) {
3110 if (old_is_dir) {
3111 drop_nlink(old_dir);
3112 inc_nlink(new_dir);
3113 } else {
3114 drop_nlink(new_dir);
3115 inc_nlink(old_dir);
3116 }
3117 }
3118 old_dir->i_ctime = old_dir->i_mtime =
3119 new_dir->i_ctime = new_dir->i_mtime =
3120 d_inode(old_dentry)->i_ctime =
3121 d_inode(new_dentry)->i_ctime = current_time(old_dir);
3122
3123 return 0;
3124}
3125
3126static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
3127{
3128 struct dentry *whiteout;
3129 int error;
3130
3131 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
3132 if (!whiteout)
3133 return -ENOMEM;
3134
3135 error = shmem_mknod(old_dir, whiteout,
3136 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
3137 dput(whiteout);
3138 if (error)
3139 return error;
3140
3141
3142
3143
3144
3145
3146
3147
3148 d_rehash(whiteout);
3149 return 0;
3150}
3151
3152
3153
3154
3155
3156
3157
3158static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
3159{
3160 struct inode *inode = d_inode(old_dentry);
3161 int they_are_dirs = S_ISDIR(inode->i_mode);
3162
3163 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3164 return -EINVAL;
3165
3166 if (flags & RENAME_EXCHANGE)
3167 return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
3168
3169 if (!simple_empty(new_dentry))
3170 return -ENOTEMPTY;
3171
3172 if (flags & RENAME_WHITEOUT) {
3173 int error;
3174
3175 error = shmem_whiteout(old_dir, old_dentry);
3176 if (error)
3177 return error;
3178 }
3179
3180 if (d_really_is_positive(new_dentry)) {
3181 (void) shmem_unlink(new_dir, new_dentry);
3182 if (they_are_dirs) {
3183 drop_nlink(d_inode(new_dentry));
3184 drop_nlink(old_dir);
3185 }
3186 } else if (they_are_dirs) {
3187 drop_nlink(old_dir);
3188 inc_nlink(new_dir);
3189 }
3190
3191 old_dir->i_size -= BOGO_DIRENT_SIZE;
3192 new_dir->i_size += BOGO_DIRENT_SIZE;
3193 old_dir->i_ctime = old_dir->i_mtime =
3194 new_dir->i_ctime = new_dir->i_mtime =
3195 inode->i_ctime = current_time(old_dir);
3196 return 0;
3197}
3198
3199static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
3200{
3201 int error;
3202 int len;
3203 struct inode *inode;
3204 struct page *page;
3205 struct shmem_inode_info *info;
3206
3207 len = strlen(symname) + 1;
3208 if (len > PAGE_SIZE)
3209 return -ENAMETOOLONG;
3210
3211 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
3212 if (!inode)
3213 return -ENOSPC;
3214
3215 error = security_inode_init_security(inode, dir, &dentry->d_name,
3216 shmem_initxattrs, NULL);
3217 if (error) {
3218 if (error != -EOPNOTSUPP) {
3219 iput(inode);
3220 return error;
3221 }
3222 error = 0;
3223 }
3224
3225 info = SHMEM_I(inode);
3226 inode->i_size = len-1;
3227 if (len <= SHORT_SYMLINK_LEN) {
3228 inode->i_link = kmemdup(symname, len, GFP_KERNEL);
3229 if (!inode->i_link) {
3230 iput(inode);
3231 return -ENOMEM;
3232 }
3233 inode->i_op = &shmem_short_symlink_operations;
3234 } else {
3235 inode_nohighmem(inode);
3236 error = shmem_getpage(inode, 0, &page, SGP_WRITE);
3237 if (error) {
3238 iput(inode);
3239 return error;
3240 }
3241 inode->i_mapping->a_ops = &shmem_aops;
3242 inode->i_op = &shmem_symlink_inode_operations;
3243 memcpy(page_address(page), symname, len);
3244 SetPageUptodate(page);
3245 set_page_dirty(page);
3246 unlock_page(page);
3247 put_page(page);
3248 }
3249 dir->i_size += BOGO_DIRENT_SIZE;
3250 dir->i_ctime = dir->i_mtime = current_time(dir);
3251 d_instantiate(dentry, inode);
3252 dget(dentry);
3253 return 0;
3254}
3255
3256static void shmem_put_link(void *arg)
3257{
3258 mark_page_accessed(arg);
3259 put_page(arg);
3260}
3261
3262static const char *shmem_get_link(struct dentry *dentry,
3263 struct inode *inode,
3264 struct delayed_call *done)
3265{
3266 struct page *page = NULL;
3267 int error;
3268 if (!dentry) {
3269 page = find_get_page(inode->i_mapping, 0);
3270 if (!page)
3271 return ERR_PTR(-ECHILD);
3272 if (!PageUptodate(page)) {
3273 put_page(page);
3274 return ERR_PTR(-ECHILD);
3275 }
3276 } else {
3277 error = shmem_getpage(inode, 0, &page, SGP_READ);
3278 if (error)
3279 return ERR_PTR(error);
3280 unlock_page(page);
3281 }
3282 set_delayed_call(done, shmem_put_link, page);
3283 return page_address(page);
3284}
3285
3286#ifdef CONFIG_TMPFS_XATTR
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297static int shmem_initxattrs(struct inode *inode,
3298 const struct xattr *xattr_array,
3299 void *fs_info)
3300{
3301 struct shmem_inode_info *info = SHMEM_I(inode);
3302 const struct xattr *xattr;
3303 struct simple_xattr *new_xattr;
3304 size_t len;
3305
3306 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
3307 new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
3308 if (!new_xattr)
3309 return -ENOMEM;
3310
3311 len = strlen(xattr->name) + 1;
3312 new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
3313 GFP_KERNEL);
3314 if (!new_xattr->name) {
3315 kfree(new_xattr);
3316 return -ENOMEM;
3317 }
3318
3319 memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
3320 XATTR_SECURITY_PREFIX_LEN);
3321 memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
3322 xattr->name, len);
3323
3324 simple_xattr_list_add(&info->xattrs, new_xattr);
3325 }
3326
3327 return 0;
3328}
3329
3330static int shmem_xattr_handler_get(const struct xattr_handler *handler,
3331 struct dentry *unused, struct inode *inode,
3332 const char *name, void *buffer, size_t size)
3333{
3334 struct shmem_inode_info *info = SHMEM_I(inode);
3335
3336 name = xattr_full_name(handler, name);
3337 return simple_xattr_get(&info->xattrs, name, buffer, size);
3338}
3339
3340static int shmem_xattr_handler_set(const struct xattr_handler *handler,
3341 struct dentry *unused, struct inode *inode,
3342 const char *name, const void *value,
3343 size_t size, int flags)
3344{
3345 struct shmem_inode_info *info = SHMEM_I(inode);
3346
3347 name = xattr_full_name(handler, name);
3348 return simple_xattr_set(&info->xattrs, name, value, size, flags);
3349}
3350
3351static const struct xattr_handler shmem_security_xattr_handler = {
3352 .prefix = XATTR_SECURITY_PREFIX,
3353 .get = shmem_xattr_handler_get,
3354 .set = shmem_xattr_handler_set,
3355};
3356
3357static const struct xattr_handler shmem_trusted_xattr_handler = {
3358 .prefix = XATTR_TRUSTED_PREFIX,
3359 .get = shmem_xattr_handler_get,
3360 .set = shmem_xattr_handler_set,
3361};
3362
3363static const struct xattr_handler *shmem_xattr_handlers[] = {
3364#ifdef CONFIG_TMPFS_POSIX_ACL
3365 &posix_acl_access_xattr_handler,
3366 &posix_acl_default_xattr_handler,
3367#endif
3368 &shmem_security_xattr_handler,
3369 &shmem_trusted_xattr_handler,
3370 NULL
3371};
3372
3373static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
3374{
3375 struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
3376 return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
3377}
3378#endif
3379
3380static const struct inode_operations shmem_short_symlink_operations = {
3381 .get_link = simple_get_link,
3382#ifdef CONFIG_TMPFS_XATTR
3383 .listxattr = shmem_listxattr,
3384#endif
3385};
3386
3387static const struct inode_operations shmem_symlink_inode_operations = {
3388 .get_link = shmem_get_link,
3389#ifdef CONFIG_TMPFS_XATTR
3390 .listxattr = shmem_listxattr,
3391#endif
3392};
3393
3394static struct dentry *shmem_get_parent(struct dentry *child)
3395{
3396 return ERR_PTR(-ESTALE);
3397}
3398
3399static int shmem_match(struct inode *ino, void *vfh)
3400{
3401 __u32 *fh = vfh;
3402 __u64 inum = fh[2];
3403 inum = (inum << 32) | fh[1];
3404 return ino->i_ino == inum && fh[0] == ino->i_generation;
3405}
3406
3407static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
3408 struct fid *fid, int fh_len, int fh_type)
3409{
3410 struct inode *inode;
3411 struct dentry *dentry = NULL;
3412 u64 inum;
3413
3414 if (fh_len < 3)
3415 return NULL;
3416
3417 inum = fid->raw[2];
3418 inum = (inum << 32) | fid->raw[1];
3419
3420 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
3421 shmem_match, fid->raw);
3422 if (inode) {
3423 dentry = d_find_alias(inode);
3424 iput(inode);
3425 }
3426
3427 return dentry;
3428}
3429
3430static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
3431 struct inode *parent)
3432{
3433 if (*len < 3) {
3434 *len = 3;
3435 return FILEID_INVALID;
3436 }
3437
3438 if (inode_unhashed(inode)) {
3439
3440
3441
3442
3443
3444 static DEFINE_SPINLOCK(lock);
3445 spin_lock(&lock);
3446 if (inode_unhashed(inode))
3447 __insert_inode_hash(inode,
3448 inode->i_ino + inode->i_generation);
3449 spin_unlock(&lock);
3450 }
3451
3452 fh[0] = inode->i_generation;
3453 fh[1] = inode->i_ino;
3454 fh[2] = ((__u64)inode->i_ino) >> 32;
3455
3456 *len = 3;
3457 return 1;
3458}
3459
3460static const struct export_operations shmem_export_ops = {
3461 .get_parent = shmem_get_parent,
3462 .encode_fh = shmem_encode_fh,
3463 .fh_to_dentry = shmem_fh_to_dentry,
3464};
3465
3466static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
3467 bool remount)
3468{
3469 char *this_char, *value, *rest;
3470 struct mempolicy *mpol = NULL;
3471 uid_t uid;
3472 gid_t gid;
3473
3474 while (options != NULL) {
3475 this_char = options;
3476 for (;;) {
3477
3478
3479
3480
3481
3482 options = strchr(options, ',');
3483 if (options == NULL)
3484 break;
3485 options++;
3486 if (!isdigit(*options)) {
3487 options[-1] = '\0';
3488 break;
3489 }
3490 }
3491 if (!*this_char)
3492 continue;
3493 if ((value = strchr(this_char,'=')) != NULL) {
3494 *value++ = 0;
3495 } else {
3496 pr_err("tmpfs: No value for mount option '%s'\n",
3497 this_char);
3498 goto error;
3499 }
3500
3501 if (!strcmp(this_char,"size")) {
3502 unsigned long long size;
3503 size = memparse(value,&rest);
3504 if (*rest == '%') {
3505 size <<= PAGE_SHIFT;
3506 size *= totalram_pages;
3507 do_div(size, 100);
3508 rest++;
3509 }
3510 if (*rest)
3511 goto bad_val;
3512 sbinfo->max_blocks =
3513 DIV_ROUND_UP(size, PAGE_SIZE);
3514 } else if (!strcmp(this_char,"nr_blocks")) {
3515 sbinfo->max_blocks = memparse(value, &rest);
3516 if (*rest)
3517 goto bad_val;
3518 } else if (!strcmp(this_char,"nr_inodes")) {
3519 sbinfo->max_inodes = memparse(value, &rest);
3520 if (*rest)
3521 goto bad_val;
3522 } else if (!strcmp(this_char,"mode")) {
3523 if (remount)
3524 continue;
3525 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
3526 if (*rest)
3527 goto bad_val;
3528 } else if (!strcmp(this_char,"uid")) {
3529 if (remount)
3530 continue;
3531 uid = simple_strtoul(value, &rest, 0);
3532 if (*rest)
3533 goto bad_val;
3534 sbinfo->uid = make_kuid(current_user_ns(), uid);
3535 if (!uid_valid(sbinfo->uid))
3536 goto bad_val;
3537 } else if (!strcmp(this_char,"gid")) {
3538 if (remount)
3539 continue;
3540 gid = simple_strtoul(value, &rest, 0);
3541 if (*rest)
3542 goto bad_val;
3543 sbinfo->gid = make_kgid(current_user_ns(), gid);
3544 if (!gid_valid(sbinfo->gid))
3545 goto bad_val;
3546#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
3547 } else if (!strcmp(this_char, "huge")) {
3548 int huge;
3549 huge = shmem_parse_huge(value);
3550 if (huge < 0)
3551 goto bad_val;
3552 if (!has_transparent_hugepage() &&
3553 huge != SHMEM_HUGE_NEVER)
3554 goto bad_val;
3555 sbinfo->huge = huge;
3556#endif
3557#ifdef CONFIG_NUMA
3558 } else if (!strcmp(this_char,"mpol")) {
3559 mpol_put(mpol);
3560 mpol = NULL;
3561 if (mpol_parse_str(value, &mpol))
3562 goto bad_val;
3563#endif
3564 } else {
3565 pr_err("tmpfs: Bad mount option %s\n", this_char);
3566 goto error;
3567 }
3568 }
3569 sbinfo->mpol = mpol;
3570 return 0;
3571
3572bad_val:
3573 pr_err("tmpfs: Bad value '%s' for mount option '%s'\n",
3574 value, this_char);
3575error:
3576 mpol_put(mpol);
3577 return 1;
3578
3579}
3580
3581static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
3582{
3583 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3584 struct shmem_sb_info config = *sbinfo;
3585 unsigned long inodes;
3586 int error = -EINVAL;
3587
3588 config.mpol = NULL;
3589 if (shmem_parse_options(data, &config, true))
3590 return error;
3591
3592 spin_lock(&sbinfo->stat_lock);
3593 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
3594 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
3595 goto out;
3596 if (config.max_inodes < inodes)
3597 goto out;
3598
3599
3600
3601
3602
3603 if (config.max_blocks && !sbinfo->max_blocks)
3604 goto out;
3605 if (config.max_inodes && !sbinfo->max_inodes)
3606 goto out;
3607
3608 error = 0;
3609 sbinfo->huge = config.huge;
3610 sbinfo->max_blocks = config.max_blocks;
3611 sbinfo->max_inodes = config.max_inodes;
3612 sbinfo->free_inodes = config.max_inodes - inodes;
3613
3614
3615
3616
3617 if (config.mpol) {
3618 mpol_put(sbinfo->mpol);
3619 sbinfo->mpol = config.mpol;
3620 }
3621out:
3622 spin_unlock(&sbinfo->stat_lock);
3623 return error;
3624}
3625
3626static int shmem_show_options(struct seq_file *seq, struct dentry *root)
3627{
3628 struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
3629
3630 if (sbinfo->max_blocks != shmem_default_max_blocks())
3631 seq_printf(seq, ",size=%luk",
3632 sbinfo->max_blocks << (PAGE_SHIFT - 10));
3633 if (sbinfo->max_inodes != shmem_default_max_inodes())
3634 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
3635 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
3636 seq_printf(seq, ",mode=%03ho", sbinfo->mode);
3637 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
3638 seq_printf(seq, ",uid=%u",
3639 from_kuid_munged(&init_user_ns, sbinfo->uid));
3640 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
3641 seq_printf(seq, ",gid=%u",
3642 from_kgid_munged(&init_user_ns, sbinfo->gid));
3643#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
3644
3645 if (sbinfo->huge)
3646 seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
3647#endif
3648 shmem_show_mpol(seq, sbinfo->mpol);
3649 return 0;
3650}
3651
3652#define MFD_NAME_PREFIX "memfd:"
3653#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
3654#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
3655
3656#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
3657
3658SYSCALL_DEFINE2(memfd_create,
3659 const char __user *, uname,
3660 unsigned int, flags)
3661{
3662 struct shmem_inode_info *info;
3663 struct file *file;
3664 int fd, error;
3665 char *name;
3666 long len;
3667
3668 if (!(flags & MFD_HUGETLB)) {
3669 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
3670 return -EINVAL;
3671 } else {
3672
3673 if (flags & MFD_ALLOW_SEALING)
3674 return -EINVAL;
3675
3676 if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
3677 (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
3678 return -EINVAL;
3679 }
3680
3681
3682 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
3683 if (len <= 0)
3684 return -EFAULT;
3685 if (len > MFD_NAME_MAX_LEN + 1)
3686 return -EINVAL;
3687
3688 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
3689 if (!name)
3690 return -ENOMEM;
3691
3692 strcpy(name, MFD_NAME_PREFIX);
3693 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
3694 error = -EFAULT;
3695 goto err_name;
3696 }
3697
3698
3699 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
3700 error = -EFAULT;
3701 goto err_name;
3702 }
3703
3704 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
3705 if (fd < 0) {
3706 error = fd;
3707 goto err_name;
3708 }
3709
3710 if (flags & MFD_HUGETLB) {
3711 struct user_struct *user = NULL;
3712
3713 file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
3714 HUGETLB_ANONHUGE_INODE,
3715 (flags >> MFD_HUGE_SHIFT) &
3716 MFD_HUGE_MASK);
3717 } else
3718 file = shmem_file_setup(name, 0, VM_NORESERVE);
3719 if (IS_ERR(file)) {
3720 error = PTR_ERR(file);
3721 goto err_fd;
3722 }
3723 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
3724 file->f_flags |= O_RDWR | O_LARGEFILE;
3725
3726 if (flags & MFD_ALLOW_SEALING) {
3727
3728
3729
3730
3731 info = SHMEM_I(file_inode(file));
3732 info->seals &= ~F_SEAL_SEAL;
3733 }
3734
3735 fd_install(fd, file);
3736 kfree(name);
3737 return fd;
3738
3739err_fd:
3740 put_unused_fd(fd);
3741err_name:
3742 kfree(name);
3743 return error;
3744}
3745
3746#endif
3747
3748static void shmem_put_super(struct super_block *sb)
3749{
3750 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
3751
3752 percpu_counter_destroy(&sbinfo->used_blocks);
3753 mpol_put(sbinfo->mpol);
3754 kfree(sbinfo);
3755 sb->s_fs_info = NULL;
3756}
3757
3758int shmem_fill_super(struct super_block *sb, void *data, int silent)
3759{
3760 struct inode *inode;
3761 struct shmem_sb_info *sbinfo;
3762 int err = -ENOMEM;
3763
3764
3765 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
3766 L1_CACHE_BYTES), GFP_KERNEL);
3767 if (!sbinfo)
3768 return -ENOMEM;
3769
3770 sbinfo->mode = S_IRWXUGO | S_ISVTX;
3771 sbinfo->uid = current_fsuid();
3772 sbinfo->gid = current_fsgid();
3773 sb->s_fs_info = sbinfo;
3774
3775#ifdef CONFIG_TMPFS
3776
3777
3778
3779
3780
3781 if (!(sb->s_flags & MS_KERNMOUNT)) {
3782 sbinfo->max_blocks = shmem_default_max_blocks();
3783 sbinfo->max_inodes = shmem_default_max_inodes();
3784 if (shmem_parse_options(data, sbinfo, false)) {
3785 err = -EINVAL;
3786 goto failed;
3787 }
3788 } else {
3789 sb->s_flags |= MS_NOUSER;
3790 }
3791 sb->s_export_op = &shmem_export_ops;
3792 sb->s_flags |= MS_NOSEC;
3793#else
3794 sb->s_flags |= MS_NOUSER;
3795#endif
3796
3797 spin_lock_init(&sbinfo->stat_lock);
3798 if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
3799 goto failed;
3800 sbinfo->free_inodes = sbinfo->max_inodes;
3801 spin_lock_init(&sbinfo->shrinklist_lock);
3802 INIT_LIST_HEAD(&sbinfo->shrinklist);
3803
3804 sb->s_maxbytes = MAX_LFS_FILESIZE;
3805 sb->s_blocksize = PAGE_SIZE;
3806 sb->s_blocksize_bits = PAGE_SHIFT;
3807 sb->s_magic = TMPFS_MAGIC;
3808 sb->s_op = &shmem_ops;
3809 sb->s_time_gran = 1;
3810#ifdef CONFIG_TMPFS_XATTR
3811 sb->s_xattr = shmem_xattr_handlers;
3812#endif
3813#ifdef CONFIG_TMPFS_POSIX_ACL
3814 sb->s_flags |= MS_POSIXACL;
3815#endif
3816 uuid_gen(&sb->s_uuid);
3817
3818 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
3819 if (!inode)
3820 goto failed;
3821 inode->i_uid = sbinfo->uid;
3822 inode->i_gid = sbinfo->gid;
3823 sb->s_root = d_make_root(inode);
3824 if (!sb->s_root)
3825 goto failed;
3826 return 0;
3827
3828failed:
3829 shmem_put_super(sb);
3830 return err;
3831}
3832
3833static struct kmem_cache *shmem_inode_cachep;
3834
3835static struct inode *shmem_alloc_inode(struct super_block *sb)
3836{
3837 struct shmem_inode_info *info;
3838 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
3839 if (!info)
3840 return NULL;
3841 return &info->vfs_inode;
3842}
3843
3844static void shmem_destroy_callback(struct rcu_head *head)
3845{
3846 struct inode *inode = container_of(head, struct inode, i_rcu);
3847 if (S_ISLNK(inode->i_mode))
3848 kfree(inode->i_link);
3849 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
3850}
3851
3852static void shmem_destroy_inode(struct inode *inode)
3853{
3854 if (S_ISREG(inode->i_mode))
3855 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
3856 call_rcu(&inode->i_rcu, shmem_destroy_callback);
3857}
3858
3859static void shmem_init_inode(void *foo)
3860{
3861 struct shmem_inode_info *info = foo;
3862 inode_init_once(&info->vfs_inode);
3863}
3864
3865static int shmem_init_inodecache(void)
3866{
3867 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
3868 sizeof(struct shmem_inode_info),
3869 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
3870 return 0;
3871}
3872
3873static void shmem_destroy_inodecache(void)
3874{
3875 kmem_cache_destroy(shmem_inode_cachep);
3876}
3877
3878static const struct address_space_operations shmem_aops = {
3879 .writepage = shmem_writepage,
3880 .set_page_dirty = __set_page_dirty_no_writeback,
3881#ifdef CONFIG_TMPFS
3882 .write_begin = shmem_write_begin,
3883 .write_end = shmem_write_end,
3884#endif
3885#ifdef CONFIG_MIGRATION
3886 .migratepage = migrate_page,
3887#endif
3888 .error_remove_page = generic_error_remove_page,
3889};
3890
3891static const struct file_operations shmem_file_operations = {
3892 .mmap = shmem_mmap,
3893 .get_unmapped_area = shmem_get_unmapped_area,
3894#ifdef CONFIG_TMPFS
3895 .llseek = shmem_file_llseek,
3896 .read_iter = shmem_file_read_iter,
3897 .write_iter = generic_file_write_iter,
3898 .fsync = noop_fsync,
3899 .splice_read = generic_file_splice_read,
3900 .splice_write = iter_file_splice_write,
3901 .fallocate = shmem_fallocate,
3902#endif
3903};
3904
3905static const struct inode_operations shmem_inode_operations = {
3906 .getattr = shmem_getattr,
3907 .setattr = shmem_setattr,
3908#ifdef CONFIG_TMPFS_XATTR
3909 .listxattr = shmem_listxattr,
3910 .set_acl = simple_set_acl,
3911#endif
3912};
3913
3914static const struct inode_operations shmem_dir_inode_operations = {
3915#ifdef CONFIG_TMPFS
3916 .create = shmem_create,
3917 .lookup = simple_lookup,
3918 .link = shmem_link,
3919 .unlink = shmem_unlink,
3920 .symlink = shmem_symlink,
3921 .mkdir = shmem_mkdir,
3922 .rmdir = shmem_rmdir,
3923 .mknod = shmem_mknod,
3924 .rename = shmem_rename2,
3925 .tmpfile = shmem_tmpfile,
3926#endif
3927#ifdef CONFIG_TMPFS_XATTR
3928 .listxattr = shmem_listxattr,
3929#endif
3930#ifdef CONFIG_TMPFS_POSIX_ACL
3931 .setattr = shmem_setattr,
3932 .set_acl = simple_set_acl,
3933#endif
3934};
3935
3936static const struct inode_operations shmem_special_inode_operations = {
3937#ifdef CONFIG_TMPFS_XATTR
3938 .listxattr = shmem_listxattr,
3939#endif
3940#ifdef CONFIG_TMPFS_POSIX_ACL
3941 .setattr = shmem_setattr,
3942 .set_acl = simple_set_acl,
3943#endif
3944};
3945
3946static const struct super_operations shmem_ops = {
3947 .alloc_inode = shmem_alloc_inode,
3948 .destroy_inode = shmem_destroy_inode,
3949#ifdef CONFIG_TMPFS
3950 .statfs = shmem_statfs,
3951 .remount_fs = shmem_remount_fs,
3952 .show_options = shmem_show_options,
3953#endif
3954 .evict_inode = shmem_evict_inode,
3955 .drop_inode = generic_delete_inode,
3956 .put_super = shmem_put_super,
3957#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
3958 .nr_cached_objects = shmem_unused_huge_count,
3959 .free_cached_objects = shmem_unused_huge_scan,
3960#endif
3961};
3962
3963static const struct vm_operations_struct shmem_vm_ops = {
3964 .fault = shmem_fault,
3965 .map_pages = filemap_map_pages,
3966#ifdef CONFIG_NUMA
3967 .set_policy = shmem_set_policy,
3968 .get_policy = shmem_get_policy,
3969#endif
3970};
3971
3972static struct dentry *shmem_mount(struct file_system_type *fs_type,
3973 int flags, const char *dev_name, void *data)
3974{
3975 return mount_nodev(fs_type, flags, data, shmem_fill_super);
3976}
3977
3978static struct file_system_type shmem_fs_type = {
3979 .owner = THIS_MODULE,
3980 .name = "tmpfs",
3981 .mount = shmem_mount,
3982 .kill_sb = kill_litter_super,
3983 .fs_flags = FS_USERNS_MOUNT,
3984};
3985
3986int __init shmem_init(void)
3987{
3988 int error;
3989
3990
3991 if (shmem_inode_cachep)
3992 return 0;
3993
3994 error = shmem_init_inodecache();
3995 if (error)
3996 goto out3;
3997
3998 error = register_filesystem(&shmem_fs_type);
3999 if (error) {
4000 pr_err("Could not register tmpfs\n");
4001 goto out2;
4002 }
4003
4004 shm_mnt = kern_mount(&shmem_fs_type);
4005 if (IS_ERR(shm_mnt)) {
4006 error = PTR_ERR(shm_mnt);
4007 pr_err("Could not kern_mount tmpfs\n");
4008 goto out1;
4009 }
4010
4011#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
4012 if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
4013 SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
4014 else
4015 shmem_huge = 0;
4016#endif
4017 return 0;
4018
4019out1:
4020 unregister_filesystem(&shmem_fs_type);
4021out2:
4022 shmem_destroy_inodecache();
4023out3:
4024 shm_mnt = ERR_PTR(error);
4025 return error;
4026}
4027
4028#if defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && defined(CONFIG_SYSFS)
4029static ssize_t shmem_enabled_show(struct kobject *kobj,
4030 struct kobj_attribute *attr, char *buf)
4031{
4032 int values[] = {
4033 SHMEM_HUGE_ALWAYS,
4034 SHMEM_HUGE_WITHIN_SIZE,
4035 SHMEM_HUGE_ADVISE,
4036 SHMEM_HUGE_NEVER,
4037 SHMEM_HUGE_DENY,
4038 SHMEM_HUGE_FORCE,
4039 };
4040 int i, count;
4041
4042 for (i = 0, count = 0; i < ARRAY_SIZE(values); i++) {
4043 const char *fmt = shmem_huge == values[i] ? "[%s] " : "%s ";
4044
4045 count += sprintf(buf + count, fmt,
4046 shmem_format_huge(values[i]));
4047 }
4048 buf[count - 1] = '\n';
4049 return count;
4050}
4051
4052static ssize_t shmem_enabled_store(struct kobject *kobj,
4053 struct kobj_attribute *attr, const char *buf, size_t count)
4054{
4055 char tmp[16];
4056 int huge;
4057
4058 if (count + 1 > sizeof(tmp))
4059 return -EINVAL;
4060 memcpy(tmp, buf, count);
4061 tmp[count] = '\0';
4062 if (count && tmp[count - 1] == '\n')
4063 tmp[count - 1] = '\0';
4064
4065 huge = shmem_parse_huge(tmp);
4066 if (huge == -EINVAL)
4067 return -EINVAL;
4068 if (!has_transparent_hugepage() &&
4069 huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
4070 return -EINVAL;
4071
4072 shmem_huge = huge;
4073 if (shmem_huge > SHMEM_HUGE_DENY)
4074 SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
4075 return count;
4076}
4077
4078struct kobj_attribute shmem_enabled_attr =
4079 __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
4080#endif
4081
4082#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
4083bool shmem_huge_enabled(struct vm_area_struct *vma)
4084{
4085 struct inode *inode = file_inode(vma->vm_file);
4086 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
4087 loff_t i_size;
4088 pgoff_t off;
4089
4090 if (shmem_huge == SHMEM_HUGE_FORCE)
4091 return true;
4092 if (shmem_huge == SHMEM_HUGE_DENY)
4093 return false;
4094 switch (sbinfo->huge) {
4095 case SHMEM_HUGE_NEVER:
4096 return false;
4097 case SHMEM_HUGE_ALWAYS:
4098 return true;
4099 case SHMEM_HUGE_WITHIN_SIZE:
4100 off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
4101 i_size = round_up(i_size_read(inode), PAGE_SIZE);
4102 if (i_size >= HPAGE_PMD_SIZE &&
4103 i_size >> PAGE_SHIFT >= off)
4104 return true;
4105 case SHMEM_HUGE_ADVISE:
4106
4107 return (vma->vm_flags & VM_HUGEPAGE);
4108 default:
4109 VM_BUG_ON(1);
4110 return false;
4111 }
4112}
4113#endif
4114
4115#else
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126static struct file_system_type shmem_fs_type = {
4127 .name = "tmpfs",
4128 .mount = ramfs_mount,
4129 .kill_sb = kill_litter_super,
4130 .fs_flags = FS_USERNS_MOUNT,
4131};
4132
4133int __init shmem_init(void)
4134{
4135 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
4136
4137 shm_mnt = kern_mount(&shmem_fs_type);
4138 BUG_ON(IS_ERR(shm_mnt));
4139
4140 return 0;
4141}
4142
4143int shmem_unuse(swp_entry_t swap, struct page *page)
4144{
4145 return 0;
4146}
4147
4148int shmem_lock(struct file *file, int lock, struct user_struct *user)
4149{
4150 return 0;
4151}
4152
4153void shmem_unlock_mapping(struct address_space *mapping)
4154{
4155}
4156
4157#ifdef CONFIG_MMU
4158unsigned long shmem_get_unmapped_area(struct file *file,
4159 unsigned long addr, unsigned long len,
4160 unsigned long pgoff, unsigned long flags)
4161{
4162 return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
4163}
4164#endif
4165
4166void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
4167{
4168 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
4169}
4170EXPORT_SYMBOL_GPL(shmem_truncate_range);
4171
4172#define shmem_vm_ops generic_file_vm_ops
4173#define shmem_file_operations ramfs_file_operations
4174#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
4175#define shmem_acct_size(flags, size) 0
4176#define shmem_unacct_size(flags, size) do {} while (0)
4177
4178#endif
4179
4180
4181
4182static const struct dentry_operations anon_ops = {
4183 .d_dname = simple_dname
4184};
4185
4186static struct file *__shmem_file_setup(const char *name, loff_t size,
4187 unsigned long flags, unsigned int i_flags)
4188{
4189 struct file *res;
4190 struct inode *inode;
4191 struct path path;
4192 struct super_block *sb;
4193 struct qstr this;
4194
4195 if (IS_ERR(shm_mnt))
4196 return ERR_CAST(shm_mnt);
4197
4198 if (size < 0 || size > MAX_LFS_FILESIZE)
4199 return ERR_PTR(-EINVAL);
4200
4201 if (shmem_acct_size(flags, size))
4202 return ERR_PTR(-ENOMEM);
4203
4204 res = ERR_PTR(-ENOMEM);
4205 this.name = name;
4206 this.len = strlen(name);
4207 this.hash = 0;
4208 sb = shm_mnt->mnt_sb;
4209 path.mnt = mntget(shm_mnt);
4210 path.dentry = d_alloc_pseudo(sb, &this);
4211 if (!path.dentry)
4212 goto put_memory;
4213 d_set_d_op(path.dentry, &anon_ops);
4214
4215 res = ERR_PTR(-ENOSPC);
4216 inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
4217 if (!inode)
4218 goto put_memory;
4219
4220 inode->i_flags |= i_flags;
4221 d_instantiate(path.dentry, inode);
4222 inode->i_size = size;
4223 clear_nlink(inode);
4224 res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
4225 if (IS_ERR(res))
4226 goto put_path;
4227
4228 res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
4229 &shmem_file_operations);
4230 if (IS_ERR(res))
4231 goto put_path;
4232
4233 return res;
4234
4235put_memory:
4236 shmem_unacct_size(flags, size);
4237put_path:
4238 path_put(&path);
4239 return res;
4240}
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
4253{
4254 return __shmem_file_setup(name, size, flags, S_PRIVATE);
4255}
4256
4257
4258
4259
4260
4261
4262
4263struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
4264{
4265 return __shmem_file_setup(name, size, flags, 0);
4266}
4267EXPORT_SYMBOL_GPL(shmem_file_setup);
4268
4269
4270
4271
4272
4273int shmem_zero_setup(struct vm_area_struct *vma)
4274{
4275 struct file *file;
4276 loff_t size = vma->vm_end - vma->vm_start;
4277
4278
4279
4280
4281
4282
4283
4284 file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
4285 if (IS_ERR(file))
4286 return PTR_ERR(file);
4287
4288 if (vma->vm_file)
4289 fput(vma->vm_file);
4290 vma->vm_file = file;
4291 vma->vm_ops = &shmem_vm_ops;
4292
4293 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
4294 ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
4295 (vma->vm_end & HPAGE_PMD_MASK)) {
4296 khugepaged_enter(vma, vma->vm_flags);
4297 }
4298
4299 return 0;
4300}
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
4318 pgoff_t index, gfp_t gfp)
4319{
4320#ifdef CONFIG_SHMEM
4321 struct inode *inode = mapping->host;
4322 struct page *page;
4323 int error;
4324
4325 BUG_ON(mapping->a_ops != &shmem_aops);
4326 error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
4327 gfp, NULL, NULL, NULL);
4328 if (error)
4329 page = ERR_PTR(error);
4330 else
4331 unlock_page(page);
4332 return page;
4333#else
4334
4335
4336
4337 return read_cache_page_gfp(mapping, index, gfp);
4338#endif
4339}
4340EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
4341