1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/fs.h>
24#include <linux/init.h>
25#include <linux/vfs.h>
26#include <linux/mount.h>
27#include <linux/pagemap.h>
28#include <linux/file.h>
29#include <linux/mm.h>
30#include <linux/module.h>
31#include <linux/percpu_counter.h>
32#include <linux/swap.h>
33
34static struct vfsmount *shm_mnt;
35
36#ifdef CONFIG_SHMEM
37
38
39
40
41
42
43#include <linux/xattr.h>
44#include <linux/exportfs.h>
45#include <linux/posix_acl.h>
46#include <linux/generic_acl.h>
47#include <linux/mman.h>
48#include <linux/string.h>
49#include <linux/slab.h>
50#include <linux/backing-dev.h>
51#include <linux/shmem_fs.h>
52#include <linux/writeback.h>
53#include <linux/blkdev.h>
54#include <linux/security.h>
55#include <linux/swapops.h>
56#include <linux/mempolicy.h>
57#include <linux/namei.h>
58#include <linux/ctype.h>
59#include <linux/migrate.h>
60#include <linux/highmem.h>
61#include <linux/seq_file.h>
62#include <linux/magic.h>
63
64#include <asm/uaccess.h>
65#include <asm/div64.h>
66#include <asm/pgtable.h>
67
68
69
70
71
72
73
74
75
76
77
78
79
80#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
81#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
82
83#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
84#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
85
86#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
87#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
88
89#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
90#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
91
92
93#define SHMEM_PAGEIN VM_READ
94#define SHMEM_TRUNCATE VM_WRITE
95
96
97#define LATENCY_LIMIT 64
98
99
100#define BOGO_DIRENT_SIZE 20
101
102
103enum sgp_type {
104 SGP_READ,
105 SGP_CACHE,
106 SGP_DIRTY,
107 SGP_WRITE,
108};
109
110#ifdef CONFIG_TMPFS
111static unsigned long shmem_default_max_blocks(void)
112{
113 return totalram_pages / 2;
114}
115
116static unsigned long shmem_default_max_inodes(void)
117{
118 return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
119}
120#endif
121
122static int shmem_getpage(struct inode *inode, unsigned long idx,
123 struct page **pagep, enum sgp_type sgp, int *type);
124
125static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
126{
127
128
129
130
131
132
133
134 return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
135 PAGE_CACHE_SHIFT-PAGE_SHIFT);
136}
137
138static inline void shmem_dir_free(struct page *page)
139{
140 __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
141}
142
143static struct page **shmem_dir_map(struct page *page)
144{
145 return (struct page **)kmap_atomic(page, KM_USER0);
146}
147
148static inline void shmem_dir_unmap(struct page **dir)
149{
150 kunmap_atomic(dir, KM_USER0);
151}
152
153static swp_entry_t *shmem_swp_map(struct page *page)
154{
155 return (swp_entry_t *)kmap_atomic(page, KM_USER1);
156}
157
158static inline void shmem_swp_balance_unmap(void)
159{
160
161
162
163
164
165
166
167 (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
168}
169
170static inline void shmem_swp_unmap(swp_entry_t *entry)
171{
172 kunmap_atomic(entry, KM_USER1);
173}
174
175static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
176{
177 return sb->s_fs_info;
178}
179
180
181
182
183
184
185
186static inline int shmem_acct_size(unsigned long flags, loff_t size)
187{
188 return (flags & VM_NORESERVE) ?
189 0 : security_vm_enough_memory_kern(VM_ACCT(size));
190}
191
192static inline void shmem_unacct_size(unsigned long flags, loff_t size)
193{
194 if (!(flags & VM_NORESERVE))
195 vm_unacct_memory(VM_ACCT(size));
196}
197
198
199
200
201
202
203
204static inline int shmem_acct_block(unsigned long flags)
205{
206 return (flags & VM_NORESERVE) ?
207 security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0;
208}
209
210static inline void shmem_unacct_blocks(unsigned long flags, long pages)
211{
212 if (flags & VM_NORESERVE)
213 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
214}
215
216static const struct super_operations shmem_ops;
217static const struct address_space_operations shmem_aops;
218static const struct file_operations shmem_file_operations;
219static const struct inode_operations shmem_inode_operations;
220static const struct inode_operations shmem_dir_inode_operations;
221static const struct inode_operations shmem_special_inode_operations;
222static const struct vm_operations_struct shmem_vm_ops;
223
224static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
225 .ra_pages = 0,
226 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
227 .unplug_io_fn = default_unplug_io_fn,
228};
229
230static LIST_HEAD(shmem_swaplist);
231static DEFINE_MUTEX(shmem_swaplist_mutex);
232
233static void shmem_free_blocks(struct inode *inode, long pages)
234{
235 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
236 if (sbinfo->max_blocks) {
237 percpu_counter_add(&sbinfo->used_blocks, -pages);
238 spin_lock(&inode->i_lock);
239 inode->i_blocks -= pages*BLOCKS_PER_PAGE;
240 spin_unlock(&inode->i_lock);
241 }
242}
243
244static int shmem_reserve_inode(struct super_block *sb)
245{
246 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
247 if (sbinfo->max_inodes) {
248 spin_lock(&sbinfo->stat_lock);
249 if (!sbinfo->free_inodes) {
250 spin_unlock(&sbinfo->stat_lock);
251 return -ENOSPC;
252 }
253 sbinfo->free_inodes--;
254 spin_unlock(&sbinfo->stat_lock);
255 }
256 return 0;
257}
258
259static void shmem_free_inode(struct super_block *sb)
260{
261 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
262 if (sbinfo->max_inodes) {
263 spin_lock(&sbinfo->stat_lock);
264 sbinfo->free_inodes++;
265 spin_unlock(&sbinfo->stat_lock);
266 }
267}
268
269
270
271
272
273
274
275
276
277
278
279
280
281static void shmem_recalc_inode(struct inode *inode)
282{
283 struct shmem_inode_info *info = SHMEM_I(inode);
284 long freed;
285
286 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
287 if (freed > 0) {
288 info->alloced -= freed;
289 shmem_unacct_blocks(info->flags, freed);
290 shmem_free_blocks(inode, freed);
291 }
292}
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
332{
333 unsigned long offset;
334 struct page **dir;
335 struct page *subdir;
336
337 if (index < SHMEM_NR_DIRECT) {
338 shmem_swp_balance_unmap();
339 return info->i_direct+index;
340 }
341 if (!info->i_indirect) {
342 if (page) {
343 info->i_indirect = *page;
344 *page = NULL;
345 }
346 return NULL;
347 }
348
349 index -= SHMEM_NR_DIRECT;
350 offset = index % ENTRIES_PER_PAGE;
351 index /= ENTRIES_PER_PAGE;
352 dir = shmem_dir_map(info->i_indirect);
353
354 if (index >= ENTRIES_PER_PAGE/2) {
355 index -= ENTRIES_PER_PAGE/2;
356 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
357 index %= ENTRIES_PER_PAGE;
358 subdir = *dir;
359 if (!subdir) {
360 if (page) {
361 *dir = *page;
362 *page = NULL;
363 }
364 shmem_dir_unmap(dir);
365 return NULL;
366 }
367 shmem_dir_unmap(dir);
368 dir = shmem_dir_map(subdir);
369 }
370
371 dir += index;
372 subdir = *dir;
373 if (!subdir) {
374 if (!page || !(subdir = *page)) {
375 shmem_dir_unmap(dir);
376 return NULL;
377 }
378 *dir = subdir;
379 *page = NULL;
380 }
381 shmem_dir_unmap(dir);
382 return shmem_swp_map(subdir) + offset;
383}
384
385static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
386{
387 long incdec = value? 1: -1;
388
389 entry->val = value;
390 info->swapped += incdec;
391 if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
392 struct page *page = kmap_atomic_to_page(entry);
393 set_page_private(page, page_private(page) + incdec);
394 }
395}
396
397
398
399
400
401
402
403
404
405static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
406{
407 struct inode *inode = &info->vfs_inode;
408 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
409 struct page *page = NULL;
410 swp_entry_t *entry;
411
412 if (sgp != SGP_WRITE &&
413 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
414 return ERR_PTR(-EINVAL);
415
416 while (!(entry = shmem_swp_entry(info, index, &page))) {
417 if (sgp == SGP_READ)
418 return shmem_swp_map(ZERO_PAGE(0));
419
420
421
422
423
424 if (sbinfo->max_blocks) {
425 if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0)
426 return ERR_PTR(-ENOSPC);
427 percpu_counter_inc(&sbinfo->used_blocks);
428 spin_lock(&inode->i_lock);
429 inode->i_blocks += BLOCKS_PER_PAGE;
430 spin_unlock(&inode->i_lock);
431 }
432
433 spin_unlock(&info->lock);
434 page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
435 spin_lock(&info->lock);
436
437 if (!page) {
438 shmem_free_blocks(inode, 1);
439 return ERR_PTR(-ENOMEM);
440 }
441 if (sgp != SGP_WRITE &&
442 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
443 entry = ERR_PTR(-EINVAL);
444 break;
445 }
446 if (info->next_index <= index)
447 info->next_index = index + 1;
448 }
449 if (page) {
450
451 shmem_free_blocks(inode, 1);
452 shmem_dir_free(page);
453 }
454 if (info->next_index <= index && !IS_ERR(entry))
455 info->next_index = index + 1;
456 return entry;
457}
458
459
460
461
462
463
464
465static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
466 spinlock_t *punch_lock)
467{
468 spinlock_t *punch_unlock = NULL;
469 swp_entry_t *ptr;
470 int freed = 0;
471
472 for (ptr = dir; ptr < edir; ptr++) {
473 if (ptr->val) {
474 if (unlikely(punch_lock)) {
475 punch_unlock = punch_lock;
476 punch_lock = NULL;
477 spin_lock(punch_unlock);
478 if (!ptr->val)
479 continue;
480 }
481 free_swap_and_cache(*ptr);
482 *ptr = (swp_entry_t){0};
483 freed++;
484 }
485 }
486 if (punch_unlock)
487 spin_unlock(punch_unlock);
488 return freed;
489}
490
491static int shmem_map_and_free_swp(struct page *subdir, int offset,
492 int limit, struct page ***dir, spinlock_t *punch_lock)
493{
494 swp_entry_t *ptr;
495 int freed = 0;
496
497 ptr = shmem_swp_map(subdir);
498 for (; offset < limit; offset += LATENCY_LIMIT) {
499 int size = limit - offset;
500 if (size > LATENCY_LIMIT)
501 size = LATENCY_LIMIT;
502 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
503 punch_lock);
504 if (need_resched()) {
505 shmem_swp_unmap(ptr);
506 if (*dir) {
507 shmem_dir_unmap(*dir);
508 *dir = NULL;
509 }
510 cond_resched();
511 ptr = shmem_swp_map(subdir);
512 }
513 }
514 shmem_swp_unmap(ptr);
515 return freed;
516}
517
518static void shmem_free_pages(struct list_head *next)
519{
520 struct page *page;
521 int freed = 0;
522
523 do {
524 page = container_of(next, struct page, lru);
525 next = next->next;
526 shmem_dir_free(page);
527 freed++;
528 if (freed >= LATENCY_LIMIT) {
529 cond_resched();
530 freed = 0;
531 }
532 } while (next);
533}
534
535static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
536{
537 struct shmem_inode_info *info = SHMEM_I(inode);
538 unsigned long idx;
539 unsigned long size;
540 unsigned long limit;
541 unsigned long stage;
542 unsigned long diroff;
543 struct page **dir;
544 struct page *topdir;
545 struct page *middir;
546 struct page *subdir;
547 swp_entry_t *ptr;
548 LIST_HEAD(pages_to_free);
549 long nr_pages_to_free = 0;
550 long nr_swaps_freed = 0;
551 int offset;
552 int freed;
553 int punch_hole;
554 spinlock_t *needs_lock;
555 spinlock_t *punch_lock;
556 unsigned long upper_limit;
557
558 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
559 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
560 if (idx >= info->next_index)
561 return;
562
563 spin_lock(&info->lock);
564 info->flags |= SHMEM_TRUNCATE;
565 if (likely(end == (loff_t) -1)) {
566 limit = info->next_index;
567 upper_limit = SHMEM_MAX_INDEX;
568 info->next_index = idx;
569 needs_lock = NULL;
570 punch_hole = 0;
571 } else {
572 if (end + 1 >= inode->i_size) {
573 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
574 PAGE_CACHE_SHIFT;
575 upper_limit = SHMEM_MAX_INDEX;
576 } else {
577 limit = (end + 1) >> PAGE_CACHE_SHIFT;
578 upper_limit = limit;
579 }
580 needs_lock = &info->lock;
581 punch_hole = 1;
582 }
583
584 topdir = info->i_indirect;
585 if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
586 info->i_indirect = NULL;
587 nr_pages_to_free++;
588 list_add(&topdir->lru, &pages_to_free);
589 }
590 spin_unlock(&info->lock);
591
592 if (info->swapped && idx < SHMEM_NR_DIRECT) {
593 ptr = info->i_direct;
594 size = limit;
595 if (size > SHMEM_NR_DIRECT)
596 size = SHMEM_NR_DIRECT;
597 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
598 }
599
600
601
602
603
604 if (!topdir || limit <= SHMEM_NR_DIRECT)
605 goto done2;
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620 upper_limit -= SHMEM_NR_DIRECT;
621 limit -= SHMEM_NR_DIRECT;
622 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
623 offset = idx % ENTRIES_PER_PAGE;
624 idx -= offset;
625
626 dir = shmem_dir_map(topdir);
627 stage = ENTRIES_PER_PAGEPAGE/2;
628 if (idx < ENTRIES_PER_PAGEPAGE/2) {
629 middir = topdir;
630 diroff = idx/ENTRIES_PER_PAGE;
631 } else {
632 dir += ENTRIES_PER_PAGE/2;
633 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
634 while (stage <= idx)
635 stage += ENTRIES_PER_PAGEPAGE;
636 middir = *dir;
637 if (*dir) {
638 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
639 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
640 if (!diroff && !offset && upper_limit >= stage) {
641 if (needs_lock) {
642 spin_lock(needs_lock);
643 *dir = NULL;
644 spin_unlock(needs_lock);
645 needs_lock = NULL;
646 } else
647 *dir = NULL;
648 nr_pages_to_free++;
649 list_add(&middir->lru, &pages_to_free);
650 }
651 shmem_dir_unmap(dir);
652 dir = shmem_dir_map(middir);
653 } else {
654 diroff = 0;
655 offset = 0;
656 idx = stage;
657 }
658 }
659
660 for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
661 if (unlikely(idx == stage)) {
662 shmem_dir_unmap(dir);
663 dir = shmem_dir_map(topdir) +
664 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
665 while (!*dir) {
666 dir++;
667 idx += ENTRIES_PER_PAGEPAGE;
668 if (idx >= limit)
669 goto done1;
670 }
671 stage = idx + ENTRIES_PER_PAGEPAGE;
672 middir = *dir;
673 if (punch_hole)
674 needs_lock = &info->lock;
675 if (upper_limit >= stage) {
676 if (needs_lock) {
677 spin_lock(needs_lock);
678 *dir = NULL;
679 spin_unlock(needs_lock);
680 needs_lock = NULL;
681 } else
682 *dir = NULL;
683 nr_pages_to_free++;
684 list_add(&middir->lru, &pages_to_free);
685 }
686 shmem_dir_unmap(dir);
687 cond_resched();
688 dir = shmem_dir_map(middir);
689 diroff = 0;
690 }
691 punch_lock = needs_lock;
692 subdir = dir[diroff];
693 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
694 if (needs_lock) {
695 spin_lock(needs_lock);
696 dir[diroff] = NULL;
697 spin_unlock(needs_lock);
698 punch_lock = NULL;
699 } else
700 dir[diroff] = NULL;
701 nr_pages_to_free++;
702 list_add(&subdir->lru, &pages_to_free);
703 }
704 if (subdir && page_private(subdir) ) {
705 size = limit - idx;
706 if (size > ENTRIES_PER_PAGE)
707 size = ENTRIES_PER_PAGE;
708 freed = shmem_map_and_free_swp(subdir,
709 offset, size, &dir, punch_lock);
710 if (!dir)
711 dir = shmem_dir_map(middir);
712 nr_swaps_freed += freed;
713 if (offset || punch_lock) {
714 spin_lock(&info->lock);
715 set_page_private(subdir,
716 page_private(subdir) - freed);
717 spin_unlock(&info->lock);
718 } else
719 BUG_ON(page_private(subdir) != freed);
720 }
721 offset = 0;
722 }
723done1:
724 shmem_dir_unmap(dir);
725done2:
726 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
727
728
729
730
731
732
733
734
735
736
737
738
739
740 truncate_inode_pages_range(inode->i_mapping, start, end);
741 if (punch_hole)
742 unmap_mapping_range(inode->i_mapping, start,
743 end - start, 1);
744 }
745
746 spin_lock(&info->lock);
747 info->flags &= ~SHMEM_TRUNCATE;
748 info->swapped -= nr_swaps_freed;
749 if (nr_pages_to_free)
750 shmem_free_blocks(inode, nr_pages_to_free);
751 shmem_recalc_inode(inode);
752 spin_unlock(&info->lock);
753
754
755
756
757 if (!list_empty(&pages_to_free)) {
758 pages_to_free.prev->next = NULL;
759 shmem_free_pages(pages_to_free.next);
760 }
761}
762
763static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
764{
765 struct inode *inode = dentry->d_inode;
766 loff_t newsize = attr->ia_size;
767 int error;
768
769 error = inode_change_ok(inode, attr);
770 if (error)
771 return error;
772
773 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
774 && newsize != inode->i_size) {
775 struct page *page = NULL;
776
777 if (newsize < inode->i_size) {
778
779
780
781
782
783
784
785 if (newsize & (PAGE_CACHE_SIZE-1)) {
786 (void) shmem_getpage(inode,
787 newsize >> PAGE_CACHE_SHIFT,
788 &page, SGP_READ, NULL);
789 if (page)
790 unlock_page(page);
791 }
792
793
794
795
796
797
798
799 if (newsize) {
800 struct shmem_inode_info *info = SHMEM_I(inode);
801 spin_lock(&info->lock);
802 info->flags &= ~SHMEM_PAGEIN;
803 spin_unlock(&info->lock);
804 }
805 }
806
807
808 truncate_setsize(inode, newsize);
809 if (page)
810 page_cache_release(page);
811 shmem_truncate_range(inode, newsize, (loff_t)-1);
812 }
813
814 setattr_copy(inode, attr);
815#ifdef CONFIG_TMPFS_POSIX_ACL
816 if (attr->ia_valid & ATTR_MODE)
817 error = generic_acl_chmod(inode);
818#endif
819 return error;
820}
821
822static void shmem_evict_inode(struct inode *inode)
823{
824 struct shmem_inode_info *info = SHMEM_I(inode);
825
826 if (inode->i_mapping->a_ops == &shmem_aops) {
827 truncate_inode_pages(inode->i_mapping, 0);
828 shmem_unacct_size(info->flags, inode->i_size);
829 inode->i_size = 0;
830 shmem_truncate_range(inode, 0, (loff_t)-1);
831 if (!list_empty(&info->swaplist)) {
832 mutex_lock(&shmem_swaplist_mutex);
833 list_del_init(&info->swaplist);
834 mutex_unlock(&shmem_swaplist_mutex);
835 }
836 }
837 BUG_ON(inode->i_blocks);
838 shmem_free_inode(inode->i_sb);
839 end_writeback(inode);
840}
841
842static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
843{
844 swp_entry_t *ptr;
845
846 for (ptr = dir; ptr < edir; ptr++) {
847 if (ptr->val == entry.val)
848 return ptr - dir;
849 }
850 return -1;
851}
852
853static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
854{
855 struct inode *inode;
856 unsigned long idx;
857 unsigned long size;
858 unsigned long limit;
859 unsigned long stage;
860 struct page **dir;
861 struct page *subdir;
862 swp_entry_t *ptr;
863 int offset;
864 int error;
865
866 idx = 0;
867 ptr = info->i_direct;
868 spin_lock(&info->lock);
869 if (!info->swapped) {
870 list_del_init(&info->swaplist);
871 goto lost2;
872 }
873 limit = info->next_index;
874 size = limit;
875 if (size > SHMEM_NR_DIRECT)
876 size = SHMEM_NR_DIRECT;
877 offset = shmem_find_swp(entry, ptr, ptr+size);
878 if (offset >= 0)
879 goto found;
880 if (!info->i_indirect)
881 goto lost2;
882
883 dir = shmem_dir_map(info->i_indirect);
884 stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
885
886 for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
887 if (unlikely(idx == stage)) {
888 shmem_dir_unmap(dir-1);
889 if (cond_resched_lock(&info->lock)) {
890
891 if (limit > info->next_index) {
892 limit = info->next_index;
893 if (idx >= limit)
894 goto lost2;
895 }
896 }
897 dir = shmem_dir_map(info->i_indirect) +
898 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
899 while (!*dir) {
900 dir++;
901 idx += ENTRIES_PER_PAGEPAGE;
902 if (idx >= limit)
903 goto lost1;
904 }
905 stage = idx + ENTRIES_PER_PAGEPAGE;
906 subdir = *dir;
907 shmem_dir_unmap(dir);
908 dir = shmem_dir_map(subdir);
909 }
910 subdir = *dir;
911 if (subdir && page_private(subdir)) {
912 ptr = shmem_swp_map(subdir);
913 size = limit - idx;
914 if (size > ENTRIES_PER_PAGE)
915 size = ENTRIES_PER_PAGE;
916 offset = shmem_find_swp(entry, ptr, ptr+size);
917 shmem_swp_unmap(ptr);
918 if (offset >= 0) {
919 shmem_dir_unmap(dir);
920 goto found;
921 }
922 }
923 }
924lost1:
925 shmem_dir_unmap(dir-1);
926lost2:
927 spin_unlock(&info->lock);
928 return 0;
929found:
930 idx += offset;
931 inode = igrab(&info->vfs_inode);
932 spin_unlock(&info->lock);
933
934
935
936
937
938
939
940
941 if (shmem_swaplist.next != &info->swaplist)
942 list_move_tail(&shmem_swaplist, &info->swaplist);
943 mutex_unlock(&shmem_swaplist_mutex);
944
945 error = 1;
946 if (!inode)
947 goto out;
948
949
950
951
952
953 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
954 if (error)
955 goto out;
956 error = radix_tree_preload(GFP_KERNEL);
957 if (error) {
958 mem_cgroup_uncharge_cache_page(page);
959 goto out;
960 }
961 error = 1;
962
963 spin_lock(&info->lock);
964 ptr = shmem_swp_entry(info, idx, NULL);
965 if (ptr && ptr->val == entry.val) {
966 error = add_to_page_cache_locked(page, inode->i_mapping,
967 idx, GFP_NOWAIT);
968
969 } else
970 mem_cgroup_uncharge_cache_page(page);
971
972 if (error == -EEXIST) {
973 struct page *filepage = find_get_page(inode->i_mapping, idx);
974 error = 1;
975 if (filepage) {
976
977
978
979
980 if (PageUptodate(filepage))
981 error = 0;
982 page_cache_release(filepage);
983 }
984 }
985 if (!error) {
986 delete_from_swap_cache(page);
987 set_page_dirty(page);
988 info->flags |= SHMEM_PAGEIN;
989 shmem_swp_set(info, ptr, 0);
990 swap_free(entry);
991 error = 1;
992 }
993 if (ptr)
994 shmem_swp_unmap(ptr);
995 spin_unlock(&info->lock);
996 radix_tree_preload_end();
997out:
998 unlock_page(page);
999 page_cache_release(page);
1000 iput(inode);
1001 return error;
1002}
1003
1004
1005
1006
1007int shmem_unuse(swp_entry_t entry, struct page *page)
1008{
1009 struct list_head *p, *next;
1010 struct shmem_inode_info *info;
1011 int found = 0;
1012
1013 mutex_lock(&shmem_swaplist_mutex);
1014 list_for_each_safe(p, next, &shmem_swaplist) {
1015 info = list_entry(p, struct shmem_inode_info, swaplist);
1016 found = shmem_unuse_inode(info, entry, page);
1017 cond_resched();
1018 if (found)
1019 goto out;
1020 }
1021 mutex_unlock(&shmem_swaplist_mutex);
1022
1023
1024
1025
1026 unlock_page(page);
1027 page_cache_release(page);
1028out:
1029 return (found < 0) ? found : 0;
1030}
1031
1032
1033
1034
1035static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1036{
1037 struct shmem_inode_info *info;
1038 swp_entry_t *entry, swap;
1039 struct address_space *mapping;
1040 unsigned long index;
1041 struct inode *inode;
1042
1043 BUG_ON(!PageLocked(page));
1044 mapping = page->mapping;
1045 index = page->index;
1046 inode = mapping->host;
1047 info = SHMEM_I(inode);
1048 if (info->flags & VM_LOCKED)
1049 goto redirty;
1050 if (!total_swap_pages)
1051 goto redirty;
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 if (wbc->for_reclaim)
1063 swap = get_swap_page();
1064 else
1065 swap.val = 0;
1066
1067 spin_lock(&info->lock);
1068 if (index >= info->next_index) {
1069 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
1070 goto unlock;
1071 }
1072 entry = shmem_swp_entry(info, index, NULL);
1073 if (entry->val) {
1074
1075
1076
1077
1078 free_swap_and_cache(*entry);
1079 shmem_swp_set(info, entry, 0);
1080 }
1081 shmem_recalc_inode(inode);
1082
1083 if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
1084 remove_from_page_cache(page);
1085 shmem_swp_set(info, entry, swap.val);
1086 shmem_swp_unmap(entry);
1087 if (list_empty(&info->swaplist))
1088 inode = igrab(inode);
1089 else
1090 inode = NULL;
1091 spin_unlock(&info->lock);
1092 swap_shmem_alloc(swap);
1093 BUG_ON(page_mapped(page));
1094 page_cache_release(page);
1095 swap_writepage(page, wbc);
1096 if (inode) {
1097 mutex_lock(&shmem_swaplist_mutex);
1098
1099 list_move_tail(&info->swaplist, &shmem_swaplist);
1100 mutex_unlock(&shmem_swaplist_mutex);
1101 iput(inode);
1102 }
1103 return 0;
1104 }
1105
1106 shmem_swp_unmap(entry);
1107unlock:
1108 spin_unlock(&info->lock);
1109
1110
1111
1112
1113 swapcache_free(swap, NULL);
1114redirty:
1115 set_page_dirty(page);
1116 if (wbc->for_reclaim)
1117 return AOP_WRITEPAGE_ACTIVATE;
1118 unlock_page(page);
1119 return 0;
1120}
1121
1122#ifdef CONFIG_NUMA
1123#ifdef CONFIG_TMPFS
1124static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1125{
1126 char buffer[64];
1127
1128 if (!mpol || mpol->mode == MPOL_DEFAULT)
1129 return;
1130
1131 mpol_to_str(buffer, sizeof(buffer), mpol, 1);
1132
1133 seq_printf(seq, ",mpol=%s", buffer);
1134}
1135
1136static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1137{
1138 struct mempolicy *mpol = NULL;
1139 if (sbinfo->mpol) {
1140 spin_lock(&sbinfo->stat_lock);
1141 mpol = sbinfo->mpol;
1142 mpol_get(mpol);
1143 spin_unlock(&sbinfo->stat_lock);
1144 }
1145 return mpol;
1146}
1147#endif
1148
1149static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
1150 struct shmem_inode_info *info, unsigned long idx)
1151{
1152 struct mempolicy mpol, *spol;
1153 struct vm_area_struct pvma;
1154 struct page *page;
1155
1156 spol = mpol_cond_copy(&mpol,
1157 mpol_shared_policy_lookup(&info->policy, idx));
1158
1159
1160 pvma.vm_start = 0;
1161 pvma.vm_pgoff = idx;
1162 pvma.vm_ops = NULL;
1163 pvma.vm_policy = spol;
1164 page = swapin_readahead(entry, gfp, &pvma, 0);
1165 return page;
1166}
1167
1168static struct page *shmem_alloc_page(gfp_t gfp,
1169 struct shmem_inode_info *info, unsigned long idx)
1170{
1171 struct vm_area_struct pvma;
1172
1173
1174 pvma.vm_start = 0;
1175 pvma.vm_pgoff = idx;
1176 pvma.vm_ops = NULL;
1177 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
1178
1179
1180
1181
1182 return alloc_page_vma(gfp, &pvma, 0);
1183}
1184#else
1185#ifdef CONFIG_TMPFS
1186static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
1187{
1188}
1189#endif
1190
1191static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
1192 struct shmem_inode_info *info, unsigned long idx)
1193{
1194 return swapin_readahead(entry, gfp, NULL, 0);
1195}
1196
1197static inline struct page *shmem_alloc_page(gfp_t gfp,
1198 struct shmem_inode_info *info, unsigned long idx)
1199{
1200 return alloc_page(gfp);
1201}
1202#endif
1203
1204#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
1205static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1206{
1207 return NULL;
1208}
1209#endif
1210
1211
1212
1213
1214
1215
1216
1217
1218static int shmem_getpage(struct inode *inode, unsigned long idx,
1219 struct page **pagep, enum sgp_type sgp, int *type)
1220{
1221 struct address_space *mapping = inode->i_mapping;
1222 struct shmem_inode_info *info = SHMEM_I(inode);
1223 struct shmem_sb_info *sbinfo;
1224 struct page *filepage = *pagep;
1225 struct page *swappage;
1226 struct page *prealloc_page = NULL;
1227 swp_entry_t *entry;
1228 swp_entry_t swap;
1229 gfp_t gfp;
1230 int error;
1231
1232 if (idx >= SHMEM_MAX_INDEX)
1233 return -EFBIG;
1234
1235 if (type)
1236 *type = 0;
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246repeat:
1247 if (!filepage)
1248 filepage = find_lock_page(mapping, idx);
1249 if (filepage && PageUptodate(filepage))
1250 goto done;
1251 gfp = mapping_gfp_mask(mapping);
1252 if (!filepage) {
1253
1254
1255
1256
1257 error = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
1258 if (error)
1259 goto failed;
1260 radix_tree_preload_end();
1261 if (sgp != SGP_READ && !prealloc_page) {
1262
1263 prealloc_page = shmem_alloc_page(gfp, info, idx);
1264 if (prealloc_page) {
1265 if (mem_cgroup_cache_charge(prealloc_page,
1266 current->mm, GFP_KERNEL)) {
1267 page_cache_release(prealloc_page);
1268 prealloc_page = NULL;
1269 }
1270 }
1271 }
1272 }
1273 error = 0;
1274
1275 spin_lock(&info->lock);
1276 shmem_recalc_inode(inode);
1277 entry = shmem_swp_alloc(info, idx, sgp);
1278 if (IS_ERR(entry)) {
1279 spin_unlock(&info->lock);
1280 error = PTR_ERR(entry);
1281 goto failed;
1282 }
1283 swap = *entry;
1284
1285 if (swap.val) {
1286
1287 swappage = lookup_swap_cache(swap);
1288 if (!swappage) {
1289 shmem_swp_unmap(entry);
1290
1291 if (type && !(*type & VM_FAULT_MAJOR)) {
1292 __count_vm_event(PGMAJFAULT);
1293 *type |= VM_FAULT_MAJOR;
1294 }
1295 spin_unlock(&info->lock);
1296 swappage = shmem_swapin(swap, gfp, info, idx);
1297 if (!swappage) {
1298 spin_lock(&info->lock);
1299 entry = shmem_swp_alloc(info, idx, sgp);
1300 if (IS_ERR(entry))
1301 error = PTR_ERR(entry);
1302 else {
1303 if (entry->val == swap.val)
1304 error = -ENOMEM;
1305 shmem_swp_unmap(entry);
1306 }
1307 spin_unlock(&info->lock);
1308 if (error)
1309 goto failed;
1310 goto repeat;
1311 }
1312 wait_on_page_locked(swappage);
1313 page_cache_release(swappage);
1314 goto repeat;
1315 }
1316
1317
1318 if (!trylock_page(swappage)) {
1319 shmem_swp_unmap(entry);
1320 spin_unlock(&info->lock);
1321 wait_on_page_locked(swappage);
1322 page_cache_release(swappage);
1323 goto repeat;
1324 }
1325 if (PageWriteback(swappage)) {
1326 shmem_swp_unmap(entry);
1327 spin_unlock(&info->lock);
1328 wait_on_page_writeback(swappage);
1329 unlock_page(swappage);
1330 page_cache_release(swappage);
1331 goto repeat;
1332 }
1333 if (!PageUptodate(swappage)) {
1334 shmem_swp_unmap(entry);
1335 spin_unlock(&info->lock);
1336 unlock_page(swappage);
1337 page_cache_release(swappage);
1338 error = -EIO;
1339 goto failed;
1340 }
1341
1342 if (filepage) {
1343 shmem_swp_set(info, entry, 0);
1344 shmem_swp_unmap(entry);
1345 delete_from_swap_cache(swappage);
1346 spin_unlock(&info->lock);
1347 copy_highpage(filepage, swappage);
1348 unlock_page(swappage);
1349 page_cache_release(swappage);
1350 flush_dcache_page(filepage);
1351 SetPageUptodate(filepage);
1352 set_page_dirty(filepage);
1353 swap_free(swap);
1354 } else if (!(error = add_to_page_cache_locked(swappage, mapping,
1355 idx, GFP_NOWAIT))) {
1356 info->flags |= SHMEM_PAGEIN;
1357 shmem_swp_set(info, entry, 0);
1358 shmem_swp_unmap(entry);
1359 delete_from_swap_cache(swappage);
1360 spin_unlock(&info->lock);
1361 filepage = swappage;
1362 set_page_dirty(filepage);
1363 swap_free(swap);
1364 } else {
1365 shmem_swp_unmap(entry);
1366 spin_unlock(&info->lock);
1367 if (error == -ENOMEM) {
1368
1369
1370
1371
1372 error = mem_cgroup_shmem_charge_fallback(
1373 swappage,
1374 current->mm,
1375 gfp);
1376 if (error) {
1377 unlock_page(swappage);
1378 page_cache_release(swappage);
1379 goto failed;
1380 }
1381 }
1382 unlock_page(swappage);
1383 page_cache_release(swappage);
1384 goto repeat;
1385 }
1386 } else if (sgp == SGP_READ && !filepage) {
1387 shmem_swp_unmap(entry);
1388 filepage = find_get_page(mapping, idx);
1389 if (filepage &&
1390 (!PageUptodate(filepage) || !trylock_page(filepage))) {
1391 spin_unlock(&info->lock);
1392 wait_on_page_locked(filepage);
1393 page_cache_release(filepage);
1394 filepage = NULL;
1395 goto repeat;
1396 }
1397 spin_unlock(&info->lock);
1398 } else {
1399 shmem_swp_unmap(entry);
1400 sbinfo = SHMEM_SB(inode->i_sb);
1401 if (sbinfo->max_blocks) {
1402 if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) ||
1403 shmem_acct_block(info->flags)) {
1404 spin_unlock(&info->lock);
1405 error = -ENOSPC;
1406 goto failed;
1407 }
1408 percpu_counter_inc(&sbinfo->used_blocks);
1409 spin_lock(&inode->i_lock);
1410 inode->i_blocks += BLOCKS_PER_PAGE;
1411 spin_unlock(&inode->i_lock);
1412 } else if (shmem_acct_block(info->flags)) {
1413 spin_unlock(&info->lock);
1414 error = -ENOSPC;
1415 goto failed;
1416 }
1417
1418 if (!filepage) {
1419 int ret;
1420
1421 if (!prealloc_page) {
1422 spin_unlock(&info->lock);
1423 filepage = shmem_alloc_page(gfp, info, idx);
1424 if (!filepage) {
1425 shmem_unacct_blocks(info->flags, 1);
1426 shmem_free_blocks(inode, 1);
1427 error = -ENOMEM;
1428 goto failed;
1429 }
1430 SetPageSwapBacked(filepage);
1431
1432
1433
1434
1435
1436 error = mem_cgroup_cache_charge(filepage,
1437 current->mm, GFP_KERNEL);
1438 if (error) {
1439 page_cache_release(filepage);
1440 shmem_unacct_blocks(info->flags, 1);
1441 shmem_free_blocks(inode, 1);
1442 filepage = NULL;
1443 goto failed;
1444 }
1445
1446 spin_lock(&info->lock);
1447 } else {
1448 filepage = prealloc_page;
1449 prealloc_page = NULL;
1450 SetPageSwapBacked(filepage);
1451 }
1452
1453 entry = shmem_swp_alloc(info, idx, sgp);
1454 if (IS_ERR(entry))
1455 error = PTR_ERR(entry);
1456 else {
1457 swap = *entry;
1458 shmem_swp_unmap(entry);
1459 }
1460 ret = error || swap.val;
1461 if (ret)
1462 mem_cgroup_uncharge_cache_page(filepage);
1463 else
1464 ret = add_to_page_cache_lru(filepage, mapping,
1465 idx, GFP_NOWAIT);
1466
1467
1468
1469
1470 if (ret) {
1471 spin_unlock(&info->lock);
1472 page_cache_release(filepage);
1473 shmem_unacct_blocks(info->flags, 1);
1474 shmem_free_blocks(inode, 1);
1475 filepage = NULL;
1476 if (error)
1477 goto failed;
1478 goto repeat;
1479 }
1480 info->flags |= SHMEM_PAGEIN;
1481 }
1482
1483 info->alloced++;
1484 spin_unlock(&info->lock);
1485 clear_highpage(filepage);
1486 flush_dcache_page(filepage);
1487 SetPageUptodate(filepage);
1488 if (sgp == SGP_DIRTY)
1489 set_page_dirty(filepage);
1490 }
1491done:
1492 *pagep = filepage;
1493 error = 0;
1494 goto out;
1495
1496failed:
1497 if (*pagep != filepage) {
1498 unlock_page(filepage);
1499 page_cache_release(filepage);
1500 }
1501out:
1502 if (prealloc_page) {
1503 mem_cgroup_uncharge_cache_page(prealloc_page);
1504 page_cache_release(prealloc_page);
1505 }
1506 return error;
1507}
1508
1509static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1510{
1511 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1512 int error;
1513 int ret;
1514
1515 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1516 return VM_FAULT_SIGBUS;
1517
1518 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1519 if (error)
1520 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1521
1522 return ret | VM_FAULT_LOCKED;
1523}
1524
1525#ifdef CONFIG_NUMA
1526static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
1527{
1528 struct inode *i = vma->vm_file->f_path.dentry->d_inode;
1529 return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
1530}
1531
1532static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1533 unsigned long addr)
1534{
1535 struct inode *i = vma->vm_file->f_path.dentry->d_inode;
1536 unsigned long idx;
1537
1538 idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1539 return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
1540}
1541#endif
1542
1543int shmem_lock(struct file *file, int lock, struct user_struct *user)
1544{
1545 struct inode *inode = file->f_path.dentry->d_inode;
1546 struct shmem_inode_info *info = SHMEM_I(inode);
1547 int retval = -ENOMEM;
1548
1549 spin_lock(&info->lock);
1550 if (lock && !(info->flags & VM_LOCKED)) {
1551 if (!user_shm_lock(inode->i_size, user))
1552 goto out_nomem;
1553 info->flags |= VM_LOCKED;
1554 mapping_set_unevictable(file->f_mapping);
1555 }
1556 if (!lock && (info->flags & VM_LOCKED) && user) {
1557 user_shm_unlock(inode->i_size, user);
1558 info->flags &= ~VM_LOCKED;
1559 mapping_clear_unevictable(file->f_mapping);
1560 scan_mapping_unevictable_pages(file->f_mapping);
1561 }
1562 retval = 0;
1563
1564out_nomem:
1565 spin_unlock(&info->lock);
1566 return retval;
1567}
1568
1569static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1570{
1571 file_accessed(file);
1572 vma->vm_ops = &shmem_vm_ops;
1573 vma->vm_flags |= VM_CAN_NONLINEAR;
1574 return 0;
1575}
1576
1577static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1578 int mode, dev_t dev, unsigned long flags)
1579{
1580 struct inode *inode;
1581 struct shmem_inode_info *info;
1582 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1583
1584 if (shmem_reserve_inode(sb))
1585 return NULL;
1586
1587 inode = new_inode(sb);
1588 if (inode) {
1589 inode->i_ino = get_next_ino();
1590 inode_init_owner(inode, dir, mode);
1591 inode->i_blocks = 0;
1592 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1593 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1594 inode->i_generation = get_seconds();
1595 info = SHMEM_I(inode);
1596 memset(info, 0, (char *)inode - (char *)info);
1597 spin_lock_init(&info->lock);
1598 info->flags = flags & VM_NORESERVE;
1599 INIT_LIST_HEAD(&info->swaplist);
1600 cache_no_acl(inode);
1601
1602 switch (mode & S_IFMT) {
1603 default:
1604 inode->i_op = &shmem_special_inode_operations;
1605 init_special_inode(inode, mode, dev);
1606 break;
1607 case S_IFREG:
1608 inode->i_mapping->a_ops = &shmem_aops;
1609 inode->i_op = &shmem_inode_operations;
1610 inode->i_fop = &shmem_file_operations;
1611 mpol_shared_policy_init(&info->policy,
1612 shmem_get_sbmpol(sbinfo));
1613 break;
1614 case S_IFDIR:
1615 inc_nlink(inode);
1616
1617 inode->i_size = 2 * BOGO_DIRENT_SIZE;
1618 inode->i_op = &shmem_dir_inode_operations;
1619 inode->i_fop = &simple_dir_operations;
1620 break;
1621 case S_IFLNK:
1622
1623
1624
1625
1626 mpol_shared_policy_init(&info->policy, NULL);
1627 break;
1628 }
1629 } else
1630 shmem_free_inode(sb);
1631 return inode;
1632}
1633
1634#ifdef CONFIG_TMPFS
1635static const struct inode_operations shmem_symlink_inode_operations;
1636static const struct inode_operations shmem_symlink_inline_operations;
1637
1638
1639
1640
1641
1642
1643static int shmem_readpage(struct file *file, struct page *page)
1644{
1645 struct inode *inode = page->mapping->host;
1646 int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
1647 unlock_page(page);
1648 return error;
1649}
1650
1651static int
1652shmem_write_begin(struct file *file, struct address_space *mapping,
1653 loff_t pos, unsigned len, unsigned flags,
1654 struct page **pagep, void **fsdata)
1655{
1656 struct inode *inode = mapping->host;
1657 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1658 *pagep = NULL;
1659 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1660}
1661
1662static int
1663shmem_write_end(struct file *file, struct address_space *mapping,
1664 loff_t pos, unsigned len, unsigned copied,
1665 struct page *page, void *fsdata)
1666{
1667 struct inode *inode = mapping->host;
1668
1669 if (pos + copied > inode->i_size)
1670 i_size_write(inode, pos + copied);
1671
1672 set_page_dirty(page);
1673 unlock_page(page);
1674 page_cache_release(page);
1675
1676 return copied;
1677}
1678
1679static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1680{
1681 struct inode *inode = filp->f_path.dentry->d_inode;
1682 struct address_space *mapping = inode->i_mapping;
1683 unsigned long index, offset;
1684 enum sgp_type sgp = SGP_READ;
1685
1686
1687
1688
1689
1690
1691 if (segment_eq(get_fs(), KERNEL_DS))
1692 sgp = SGP_DIRTY;
1693
1694 index = *ppos >> PAGE_CACHE_SHIFT;
1695 offset = *ppos & ~PAGE_CACHE_MASK;
1696
1697 for (;;) {
1698 struct page *page = NULL;
1699 unsigned long end_index, nr, ret;
1700 loff_t i_size = i_size_read(inode);
1701
1702 end_index = i_size >> PAGE_CACHE_SHIFT;
1703 if (index > end_index)
1704 break;
1705 if (index == end_index) {
1706 nr = i_size & ~PAGE_CACHE_MASK;
1707 if (nr <= offset)
1708 break;
1709 }
1710
1711 desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1712 if (desc->error) {
1713 if (desc->error == -EINVAL)
1714 desc->error = 0;
1715 break;
1716 }
1717 if (page)
1718 unlock_page(page);
1719
1720
1721
1722
1723
1724 nr = PAGE_CACHE_SIZE;
1725 i_size = i_size_read(inode);
1726 end_index = i_size >> PAGE_CACHE_SHIFT;
1727 if (index == end_index) {
1728 nr = i_size & ~PAGE_CACHE_MASK;
1729 if (nr <= offset) {
1730 if (page)
1731 page_cache_release(page);
1732 break;
1733 }
1734 }
1735 nr -= offset;
1736
1737 if (page) {
1738
1739
1740
1741
1742
1743 if (mapping_writably_mapped(mapping))
1744 flush_dcache_page(page);
1745
1746
1747
1748 if (!offset)
1749 mark_page_accessed(page);
1750 } else {
1751 page = ZERO_PAGE(0);
1752 page_cache_get(page);
1753 }
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765 ret = actor(desc, page, offset, nr);
1766 offset += ret;
1767 index += offset >> PAGE_CACHE_SHIFT;
1768 offset &= ~PAGE_CACHE_MASK;
1769
1770 page_cache_release(page);
1771 if (ret != nr || !desc->count)
1772 break;
1773
1774 cond_resched();
1775 }
1776
1777 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1778 file_accessed(filp);
1779}
1780
1781static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1782 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1783{
1784 struct file *filp = iocb->ki_filp;
1785 ssize_t retval;
1786 unsigned long seg;
1787 size_t count;
1788 loff_t *ppos = &iocb->ki_pos;
1789
1790 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1791 if (retval)
1792 return retval;
1793
1794 for (seg = 0; seg < nr_segs; seg++) {
1795 read_descriptor_t desc;
1796
1797 desc.written = 0;
1798 desc.arg.buf = iov[seg].iov_base;
1799 desc.count = iov[seg].iov_len;
1800 if (desc.count == 0)
1801 continue;
1802 desc.error = 0;
1803 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1804 retval += desc.written;
1805 if (desc.error) {
1806 retval = retval ?: desc.error;
1807 break;
1808 }
1809 if (desc.count > 0)
1810 break;
1811 }
1812 return retval;
1813}
1814
1815static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1816{
1817 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1818
1819 buf->f_type = TMPFS_MAGIC;
1820 buf->f_bsize = PAGE_CACHE_SIZE;
1821 buf->f_namelen = NAME_MAX;
1822 if (sbinfo->max_blocks) {
1823 buf->f_blocks = sbinfo->max_blocks;
1824 buf->f_bavail = buf->f_bfree =
1825 sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks);
1826 }
1827 if (sbinfo->max_inodes) {
1828 buf->f_files = sbinfo->max_inodes;
1829 buf->f_ffree = sbinfo->free_inodes;
1830 }
1831
1832 return 0;
1833}
1834
1835
1836
1837
1838static int
1839shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1840{
1841 struct inode *inode;
1842 int error = -ENOSPC;
1843
1844 inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1845 if (inode) {
1846 error = security_inode_init_security(inode, dir, NULL, NULL,
1847 NULL);
1848 if (error) {
1849 if (error != -EOPNOTSUPP) {
1850 iput(inode);
1851 return error;
1852 }
1853 }
1854#ifdef CONFIG_TMPFS_POSIX_ACL
1855 error = generic_acl_init(inode, dir);
1856 if (error) {
1857 iput(inode);
1858 return error;
1859 }
1860#else
1861 error = 0;
1862#endif
1863 dir->i_size += BOGO_DIRENT_SIZE;
1864 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1865 d_instantiate(dentry, inode);
1866 dget(dentry);
1867 }
1868 return error;
1869}
1870
1871static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1872{
1873 int error;
1874
1875 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1876 return error;
1877 inc_nlink(dir);
1878 return 0;
1879}
1880
1881static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
1882 struct nameidata *nd)
1883{
1884 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1885}
1886
1887
1888
1889
1890static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1891{
1892 struct inode *inode = old_dentry->d_inode;
1893 int ret;
1894
1895
1896
1897
1898
1899
1900 ret = shmem_reserve_inode(inode->i_sb);
1901 if (ret)
1902 goto out;
1903
1904 dir->i_size += BOGO_DIRENT_SIZE;
1905 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1906 inc_nlink(inode);
1907 ihold(inode);
1908 dget(dentry);
1909 d_instantiate(dentry, inode);
1910out:
1911 return ret;
1912}
1913
1914static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1915{
1916 struct inode *inode = dentry->d_inode;
1917
1918 if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
1919 shmem_free_inode(inode->i_sb);
1920
1921 dir->i_size -= BOGO_DIRENT_SIZE;
1922 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1923 drop_nlink(inode);
1924 dput(dentry);
1925 return 0;
1926}
1927
1928static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1929{
1930 if (!simple_empty(dentry))
1931 return -ENOTEMPTY;
1932
1933 drop_nlink(dentry->d_inode);
1934 drop_nlink(dir);
1935 return shmem_unlink(dir, dentry);
1936}
1937
1938
1939
1940
1941
1942
1943
1944static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1945{
1946 struct inode *inode = old_dentry->d_inode;
1947 int they_are_dirs = S_ISDIR(inode->i_mode);
1948
1949 if (!simple_empty(new_dentry))
1950 return -ENOTEMPTY;
1951
1952 if (new_dentry->d_inode) {
1953 (void) shmem_unlink(new_dir, new_dentry);
1954 if (they_are_dirs)
1955 drop_nlink(old_dir);
1956 } else if (they_are_dirs) {
1957 drop_nlink(old_dir);
1958 inc_nlink(new_dir);
1959 }
1960
1961 old_dir->i_size -= BOGO_DIRENT_SIZE;
1962 new_dir->i_size += BOGO_DIRENT_SIZE;
1963 old_dir->i_ctime = old_dir->i_mtime =
1964 new_dir->i_ctime = new_dir->i_mtime =
1965 inode->i_ctime = CURRENT_TIME;
1966 return 0;
1967}
1968
1969static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1970{
1971 int error;
1972 int len;
1973 struct inode *inode;
1974 struct page *page = NULL;
1975 char *kaddr;
1976 struct shmem_inode_info *info;
1977
1978 len = strlen(symname) + 1;
1979 if (len > PAGE_CACHE_SIZE)
1980 return -ENAMETOOLONG;
1981
1982 inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1983 if (!inode)
1984 return -ENOSPC;
1985
1986 error = security_inode_init_security(inode, dir, NULL, NULL,
1987 NULL);
1988 if (error) {
1989 if (error != -EOPNOTSUPP) {
1990 iput(inode);
1991 return error;
1992 }
1993 error = 0;
1994 }
1995
1996 info = SHMEM_I(inode);
1997 inode->i_size = len-1;
1998 if (len <= (char *)inode - (char *)info) {
1999
2000 memcpy(info, symname, len);
2001 inode->i_op = &shmem_symlink_inline_operations;
2002 } else {
2003 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2004 if (error) {
2005 iput(inode);
2006 return error;
2007 }
2008 inode->i_mapping->a_ops = &shmem_aops;
2009 inode->i_op = &shmem_symlink_inode_operations;
2010 kaddr = kmap_atomic(page, KM_USER0);
2011 memcpy(kaddr, symname, len);
2012 kunmap_atomic(kaddr, KM_USER0);
2013 set_page_dirty(page);
2014 unlock_page(page);
2015 page_cache_release(page);
2016 }
2017 dir->i_size += BOGO_DIRENT_SIZE;
2018 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2019 d_instantiate(dentry, inode);
2020 dget(dentry);
2021 return 0;
2022}
2023
2024static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
2025{
2026 nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
2027 return NULL;
2028}
2029
2030static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2031{
2032 struct page *page = NULL;
2033 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2034 nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
2035 if (page)
2036 unlock_page(page);
2037 return page;
2038}
2039
2040static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2041{
2042 if (!IS_ERR(nd_get_link(nd))) {
2043 struct page *page = cookie;
2044 kunmap(page);
2045 mark_page_accessed(page);
2046 page_cache_release(page);
2047 }
2048}
2049
2050static const struct inode_operations shmem_symlink_inline_operations = {
2051 .readlink = generic_readlink,
2052 .follow_link = shmem_follow_link_inline,
2053};
2054
2055static const struct inode_operations shmem_symlink_inode_operations = {
2056 .readlink = generic_readlink,
2057 .follow_link = shmem_follow_link,
2058 .put_link = shmem_put_link,
2059};
2060
2061#ifdef CONFIG_TMPFS_POSIX_ACL
2062
2063
2064
2065
2066
2067
2068
2069static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
2070 size_t list_len, const char *name,
2071 size_t name_len, int handler_flags)
2072{
2073 return security_inode_listsecurity(dentry->d_inode, list, list_len);
2074}
2075
2076static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
2077 void *buffer, size_t size, int handler_flags)
2078{
2079 if (strcmp(name, "") == 0)
2080 return -EINVAL;
2081 return xattr_getsecurity(dentry->d_inode, name, buffer, size);
2082}
2083
2084static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
2085 const void *value, size_t size, int flags, int handler_flags)
2086{
2087 if (strcmp(name, "") == 0)
2088 return -EINVAL;
2089 return security_inode_setsecurity(dentry->d_inode, name, value,
2090 size, flags);
2091}
2092
2093static const struct xattr_handler shmem_xattr_security_handler = {
2094 .prefix = XATTR_SECURITY_PREFIX,
2095 .list = shmem_xattr_security_list,
2096 .get = shmem_xattr_security_get,
2097 .set = shmem_xattr_security_set,
2098};
2099
2100static const struct xattr_handler *shmem_xattr_handlers[] = {
2101 &generic_acl_access_handler,
2102 &generic_acl_default_handler,
2103 &shmem_xattr_security_handler,
2104 NULL
2105};
2106#endif
2107
2108static struct dentry *shmem_get_parent(struct dentry *child)
2109{
2110 return ERR_PTR(-ESTALE);
2111}
2112
2113static int shmem_match(struct inode *ino, void *vfh)
2114{
2115 __u32 *fh = vfh;
2116 __u64 inum = fh[2];
2117 inum = (inum << 32) | fh[1];
2118 return ino->i_ino == inum && fh[0] == ino->i_generation;
2119}
2120
2121static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2122 struct fid *fid, int fh_len, int fh_type)
2123{
2124 struct inode *inode;
2125 struct dentry *dentry = NULL;
2126 u64 inum = fid->raw[2];
2127 inum = (inum << 32) | fid->raw[1];
2128
2129 if (fh_len < 3)
2130 return NULL;
2131
2132 inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2133 shmem_match, fid->raw);
2134 if (inode) {
2135 dentry = d_find_alias(inode);
2136 iput(inode);
2137 }
2138
2139 return dentry;
2140}
2141
2142static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2143 int connectable)
2144{
2145 struct inode *inode = dentry->d_inode;
2146
2147 if (*len < 3)
2148 return 255;
2149
2150 if (inode_unhashed(inode)) {
2151
2152
2153
2154
2155
2156 static DEFINE_SPINLOCK(lock);
2157 spin_lock(&lock);
2158 if (inode_unhashed(inode))
2159 __insert_inode_hash(inode,
2160 inode->i_ino + inode->i_generation);
2161 spin_unlock(&lock);
2162 }
2163
2164 fh[0] = inode->i_generation;
2165 fh[1] = inode->i_ino;
2166 fh[2] = ((__u64)inode->i_ino) >> 32;
2167
2168 *len = 3;
2169 return 1;
2170}
2171
2172static const struct export_operations shmem_export_ops = {
2173 .get_parent = shmem_get_parent,
2174 .encode_fh = shmem_encode_fh,
2175 .fh_to_dentry = shmem_fh_to_dentry,
2176};
2177
2178static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2179 bool remount)
2180{
2181 char *this_char, *value, *rest;
2182
2183 while (options != NULL) {
2184 this_char = options;
2185 for (;;) {
2186
2187
2188
2189
2190
2191 options = strchr(options, ',');
2192 if (options == NULL)
2193 break;
2194 options++;
2195 if (!isdigit(*options)) {
2196 options[-1] = '\0';
2197 break;
2198 }
2199 }
2200 if (!*this_char)
2201 continue;
2202 if ((value = strchr(this_char,'=')) != NULL) {
2203 *value++ = 0;
2204 } else {
2205 printk(KERN_ERR
2206 "tmpfs: No value for mount option '%s'\n",
2207 this_char);
2208 return 1;
2209 }
2210
2211 if (!strcmp(this_char,"size")) {
2212 unsigned long long size;
2213 size = memparse(value,&rest);
2214 if (*rest == '%') {
2215 size <<= PAGE_SHIFT;
2216 size *= totalram_pages;
2217 do_div(size, 100);
2218 rest++;
2219 }
2220 if (*rest)
2221 goto bad_val;
2222 sbinfo->max_blocks =
2223 DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
2224 } else if (!strcmp(this_char,"nr_blocks")) {
2225 sbinfo->max_blocks = memparse(value, &rest);
2226 if (*rest)
2227 goto bad_val;
2228 } else if (!strcmp(this_char,"nr_inodes")) {
2229 sbinfo->max_inodes = memparse(value, &rest);
2230 if (*rest)
2231 goto bad_val;
2232 } else if (!strcmp(this_char,"mode")) {
2233 if (remount)
2234 continue;
2235 sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
2236 if (*rest)
2237 goto bad_val;
2238 } else if (!strcmp(this_char,"uid")) {
2239 if (remount)
2240 continue;
2241 sbinfo->uid = simple_strtoul(value, &rest, 0);
2242 if (*rest)
2243 goto bad_val;
2244 } else if (!strcmp(this_char,"gid")) {
2245 if (remount)
2246 continue;
2247 sbinfo->gid = simple_strtoul(value, &rest, 0);
2248 if (*rest)
2249 goto bad_val;
2250 } else if (!strcmp(this_char,"mpol")) {
2251 if (mpol_parse_str(value, &sbinfo->mpol, 1))
2252 goto bad_val;
2253 } else {
2254 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
2255 this_char);
2256 return 1;
2257 }
2258 }
2259 return 0;
2260
2261bad_val:
2262 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
2263 value, this_char);
2264 return 1;
2265
2266}
2267
2268static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2269{
2270 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2271 struct shmem_sb_info config = *sbinfo;
2272 unsigned long inodes;
2273 int error = -EINVAL;
2274
2275 if (shmem_parse_options(data, &config, true))
2276 return error;
2277
2278 spin_lock(&sbinfo->stat_lock);
2279 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
2280 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
2281 goto out;
2282 if (config.max_inodes < inodes)
2283 goto out;
2284
2285
2286
2287
2288
2289
2290 if (config.max_blocks && !sbinfo->max_blocks)
2291 goto out;
2292 if (config.max_inodes && !sbinfo->max_inodes)
2293 goto out;
2294
2295 error = 0;
2296 sbinfo->max_blocks = config.max_blocks;
2297 sbinfo->max_inodes = config.max_inodes;
2298 sbinfo->free_inodes = config.max_inodes - inodes;
2299
2300 mpol_put(sbinfo->mpol);
2301 sbinfo->mpol = config.mpol;
2302out:
2303 spin_unlock(&sbinfo->stat_lock);
2304 return error;
2305}
2306
2307static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
2308{
2309 struct shmem_sb_info *sbinfo = SHMEM_SB(vfs->mnt_sb);
2310
2311 if (sbinfo->max_blocks != shmem_default_max_blocks())
2312 seq_printf(seq, ",size=%luk",
2313 sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
2314 if (sbinfo->max_inodes != shmem_default_max_inodes())
2315 seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
2316 if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
2317 seq_printf(seq, ",mode=%03o", sbinfo->mode);
2318 if (sbinfo->uid != 0)
2319 seq_printf(seq, ",uid=%u", sbinfo->uid);
2320 if (sbinfo->gid != 0)
2321 seq_printf(seq, ",gid=%u", sbinfo->gid);
2322 shmem_show_mpol(seq, sbinfo->mpol);
2323 return 0;
2324}
2325#endif
2326
2327static void shmem_put_super(struct super_block *sb)
2328{
2329 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2330
2331 percpu_counter_destroy(&sbinfo->used_blocks);
2332 kfree(sbinfo);
2333 sb->s_fs_info = NULL;
2334}
2335
2336int shmem_fill_super(struct super_block *sb, void *data, int silent)
2337{
2338 struct inode *inode;
2339 struct dentry *root;
2340 struct shmem_sb_info *sbinfo;
2341 int err = -ENOMEM;
2342
2343
2344 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
2345 L1_CACHE_BYTES), GFP_KERNEL);
2346 if (!sbinfo)
2347 return -ENOMEM;
2348
2349 sbinfo->mode = S_IRWXUGO | S_ISVTX;
2350 sbinfo->uid = current_fsuid();
2351 sbinfo->gid = current_fsgid();
2352 sb->s_fs_info = sbinfo;
2353
2354#ifdef CONFIG_TMPFS
2355
2356
2357
2358
2359
2360 if (!(sb->s_flags & MS_NOUSER)) {
2361 sbinfo->max_blocks = shmem_default_max_blocks();
2362 sbinfo->max_inodes = shmem_default_max_inodes();
2363 if (shmem_parse_options(data, sbinfo, false)) {
2364 err = -EINVAL;
2365 goto failed;
2366 }
2367 }
2368 sb->s_export_op = &shmem_export_ops;
2369#else
2370 sb->s_flags |= MS_NOUSER;
2371#endif
2372
2373 spin_lock_init(&sbinfo->stat_lock);
2374 if (percpu_counter_init(&sbinfo->used_blocks, 0))
2375 goto failed;
2376 sbinfo->free_inodes = sbinfo->max_inodes;
2377
2378 sb->s_maxbytes = SHMEM_MAX_BYTES;
2379 sb->s_blocksize = PAGE_CACHE_SIZE;
2380 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
2381 sb->s_magic = TMPFS_MAGIC;
2382 sb->s_op = &shmem_ops;
2383 sb->s_time_gran = 1;
2384#ifdef CONFIG_TMPFS_POSIX_ACL
2385 sb->s_xattr = shmem_xattr_handlers;
2386 sb->s_flags |= MS_POSIXACL;
2387#endif
2388
2389 inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
2390 if (!inode)
2391 goto failed;
2392 inode->i_uid = sbinfo->uid;
2393 inode->i_gid = sbinfo->gid;
2394 root = d_alloc_root(inode);
2395 if (!root)
2396 goto failed_iput;
2397 sb->s_root = root;
2398 return 0;
2399
2400failed_iput:
2401 iput(inode);
2402failed:
2403 shmem_put_super(sb);
2404 return err;
2405}
2406
2407static struct kmem_cache *shmem_inode_cachep;
2408
2409static struct inode *shmem_alloc_inode(struct super_block *sb)
2410{
2411 struct shmem_inode_info *p;
2412 p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
2413 if (!p)
2414 return NULL;
2415 return &p->vfs_inode;
2416}
2417
2418static void shmem_i_callback(struct rcu_head *head)
2419{
2420 struct inode *inode = container_of(head, struct inode, i_rcu);
2421 INIT_LIST_HEAD(&inode->i_dentry);
2422 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2423}
2424
2425static void shmem_destroy_inode(struct inode *inode)
2426{
2427 if ((inode->i_mode & S_IFMT) == S_IFREG) {
2428
2429 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2430 }
2431 call_rcu(&inode->i_rcu, shmem_i_callback);
2432}
2433
2434static void init_once(void *foo)
2435{
2436 struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
2437
2438 inode_init_once(&p->vfs_inode);
2439}
2440
2441static int init_inodecache(void)
2442{
2443 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2444 sizeof(struct shmem_inode_info),
2445 0, SLAB_PANIC, init_once);
2446 return 0;
2447}
2448
2449static void destroy_inodecache(void)
2450{
2451 kmem_cache_destroy(shmem_inode_cachep);
2452}
2453
2454static const struct address_space_operations shmem_aops = {
2455 .writepage = shmem_writepage,
2456 .set_page_dirty = __set_page_dirty_no_writeback,
2457#ifdef CONFIG_TMPFS
2458 .readpage = shmem_readpage,
2459 .write_begin = shmem_write_begin,
2460 .write_end = shmem_write_end,
2461#endif
2462 .migratepage = migrate_page,
2463 .error_remove_page = generic_error_remove_page,
2464};
2465
2466static const struct file_operations shmem_file_operations = {
2467 .mmap = shmem_mmap,
2468#ifdef CONFIG_TMPFS
2469 .llseek = generic_file_llseek,
2470 .read = do_sync_read,
2471 .write = do_sync_write,
2472 .aio_read = shmem_file_aio_read,
2473 .aio_write = generic_file_aio_write,
2474 .fsync = noop_fsync,
2475 .splice_read = generic_file_splice_read,
2476 .splice_write = generic_file_splice_write,
2477#endif
2478};
2479
2480static const struct inode_operations shmem_inode_operations = {
2481 .setattr = shmem_notify_change,
2482 .truncate_range = shmem_truncate_range,
2483#ifdef CONFIG_TMPFS_POSIX_ACL
2484 .setxattr = generic_setxattr,
2485 .getxattr = generic_getxattr,
2486 .listxattr = generic_listxattr,
2487 .removexattr = generic_removexattr,
2488 .check_acl = generic_check_acl,
2489#endif
2490
2491};
2492
2493static const struct inode_operations shmem_dir_inode_operations = {
2494#ifdef CONFIG_TMPFS
2495 .create = shmem_create,
2496 .lookup = simple_lookup,
2497 .link = shmem_link,
2498 .unlink = shmem_unlink,
2499 .symlink = shmem_symlink,
2500 .mkdir = shmem_mkdir,
2501 .rmdir = shmem_rmdir,
2502 .mknod = shmem_mknod,
2503 .rename = shmem_rename,
2504#endif
2505#ifdef CONFIG_TMPFS_POSIX_ACL
2506 .setattr = shmem_notify_change,
2507 .setxattr = generic_setxattr,
2508 .getxattr = generic_getxattr,
2509 .listxattr = generic_listxattr,
2510 .removexattr = generic_removexattr,
2511 .check_acl = generic_check_acl,
2512#endif
2513};
2514
2515static const struct inode_operations shmem_special_inode_operations = {
2516#ifdef CONFIG_TMPFS_POSIX_ACL
2517 .setattr = shmem_notify_change,
2518 .setxattr = generic_setxattr,
2519 .getxattr = generic_getxattr,
2520 .listxattr = generic_listxattr,
2521 .removexattr = generic_removexattr,
2522 .check_acl = generic_check_acl,
2523#endif
2524};
2525
2526static const struct super_operations shmem_ops = {
2527 .alloc_inode = shmem_alloc_inode,
2528 .destroy_inode = shmem_destroy_inode,
2529#ifdef CONFIG_TMPFS
2530 .statfs = shmem_statfs,
2531 .remount_fs = shmem_remount_fs,
2532 .show_options = shmem_show_options,
2533#endif
2534 .evict_inode = shmem_evict_inode,
2535 .drop_inode = generic_delete_inode,
2536 .put_super = shmem_put_super,
2537};
2538
2539static const struct vm_operations_struct shmem_vm_ops = {
2540 .fault = shmem_fault,
2541#ifdef CONFIG_NUMA
2542 .set_policy = shmem_set_policy,
2543 .get_policy = shmem_get_policy,
2544#endif
2545};
2546
2547
2548static struct dentry *shmem_mount(struct file_system_type *fs_type,
2549 int flags, const char *dev_name, void *data)
2550{
2551 return mount_nodev(fs_type, flags, data, shmem_fill_super);
2552}
2553
2554static struct file_system_type tmpfs_fs_type = {
2555 .owner = THIS_MODULE,
2556 .name = "tmpfs",
2557 .mount = shmem_mount,
2558 .kill_sb = kill_litter_super,
2559};
2560
2561int __init init_tmpfs(void)
2562{
2563 int error;
2564
2565 error = bdi_init(&shmem_backing_dev_info);
2566 if (error)
2567 goto out4;
2568
2569 error = init_inodecache();
2570 if (error)
2571 goto out3;
2572
2573 error = register_filesystem(&tmpfs_fs_type);
2574 if (error) {
2575 printk(KERN_ERR "Could not register tmpfs\n");
2576 goto out2;
2577 }
2578
2579 shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
2580 tmpfs_fs_type.name, NULL);
2581 if (IS_ERR(shm_mnt)) {
2582 error = PTR_ERR(shm_mnt);
2583 printk(KERN_ERR "Could not kern_mount tmpfs\n");
2584 goto out1;
2585 }
2586 return 0;
2587
2588out1:
2589 unregister_filesystem(&tmpfs_fs_type);
2590out2:
2591 destroy_inodecache();
2592out3:
2593 bdi_destroy(&shmem_backing_dev_info);
2594out4:
2595 shm_mnt = ERR_PTR(error);
2596 return error;
2597}
2598
2599#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2611 struct page **pagep, swp_entry_t *ent)
2612{
2613 swp_entry_t entry = { .val = 0 }, *ptr;
2614 struct page *page = NULL;
2615 struct shmem_inode_info *info = SHMEM_I(inode);
2616
2617 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2618 goto out;
2619
2620 spin_lock(&info->lock);
2621 ptr = shmem_swp_entry(info, pgoff, NULL);
2622#ifdef CONFIG_SWAP
2623 if (ptr && ptr->val) {
2624 entry.val = ptr->val;
2625 page = find_get_page(&swapper_space, entry.val);
2626 } else
2627#endif
2628 page = find_get_page(inode->i_mapping, pgoff);
2629 if (ptr)
2630 shmem_swp_unmap(ptr);
2631 spin_unlock(&info->lock);
2632out:
2633 *pagep = page;
2634 *ent = entry;
2635}
2636#endif
2637
2638#else
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649#include <linux/ramfs.h>
2650
2651static struct file_system_type tmpfs_fs_type = {
2652 .name = "tmpfs",
2653 .mount = ramfs_mount,
2654 .kill_sb = kill_litter_super,
2655};
2656
2657int __init init_tmpfs(void)
2658{
2659 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
2660
2661 shm_mnt = kern_mount(&tmpfs_fs_type);
2662 BUG_ON(IS_ERR(shm_mnt));
2663
2664 return 0;
2665}
2666
2667int shmem_unuse(swp_entry_t entry, struct page *page)
2668{
2669 return 0;
2670}
2671
2672int shmem_lock(struct file *file, int lock, struct user_struct *user)
2673{
2674 return 0;
2675}
2676
2677#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2689 struct page **pagep, swp_entry_t *ent)
2690{
2691 struct page *page = NULL;
2692
2693 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2694 goto out;
2695 page = find_get_page(inode->i_mapping, pgoff);
2696out:
2697 *pagep = page;
2698 *ent = (swp_entry_t){ .val = 0 };
2699}
2700#endif
2701
2702#define shmem_vm_ops generic_file_vm_ops
2703#define shmem_file_operations ramfs_file_operations
2704#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
2705#define shmem_acct_size(flags, size) 0
2706#define shmem_unacct_size(flags, size) do {} while (0)
2707#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
2708
2709#endif
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
2720{
2721 int error;
2722 struct file *file;
2723 struct inode *inode;
2724 struct path path;
2725 struct dentry *root;
2726 struct qstr this;
2727
2728 if (IS_ERR(shm_mnt))
2729 return (void *)shm_mnt;
2730
2731 if (size < 0 || size > SHMEM_MAX_BYTES)
2732 return ERR_PTR(-EINVAL);
2733
2734 if (shmem_acct_size(flags, size))
2735 return ERR_PTR(-ENOMEM);
2736
2737 error = -ENOMEM;
2738 this.name = name;
2739 this.len = strlen(name);
2740 this.hash = 0;
2741 root = shm_mnt->mnt_root;
2742 path.dentry = d_alloc(root, &this);
2743 if (!path.dentry)
2744 goto put_memory;
2745 path.mnt = mntget(shm_mnt);
2746
2747 error = -ENOSPC;
2748 inode = shmem_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
2749 if (!inode)
2750 goto put_dentry;
2751
2752 d_instantiate(path.dentry, inode);
2753 inode->i_size = size;
2754 inode->i_nlink = 0;
2755#ifndef CONFIG_MMU
2756 error = ramfs_nommu_expand_for_mapping(inode, size);
2757 if (error)
2758 goto put_dentry;
2759#endif
2760
2761 error = -ENFILE;
2762 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
2763 &shmem_file_operations);
2764 if (!file)
2765 goto put_dentry;
2766
2767 return file;
2768
2769put_dentry:
2770 path_put(&path);
2771put_memory:
2772 shmem_unacct_size(flags, size);
2773 return ERR_PTR(error);
2774}
2775EXPORT_SYMBOL_GPL(shmem_file_setup);
2776
2777
2778
2779
2780
2781int shmem_zero_setup(struct vm_area_struct *vma)
2782{
2783 struct file *file;
2784 loff_t size = vma->vm_end - vma->vm_start;
2785
2786 file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2787 if (IS_ERR(file))
2788 return PTR_ERR(file);
2789
2790 if (vma->vm_file)
2791 fput(vma->vm_file);
2792 vma->vm_file = file;
2793 vma->vm_ops = &shmem_vm_ops;
2794 return 0;
2795}
2796