1
2
3
4
5#include <linux/export.h>
6#include <linux/fs.h>
7#include <linux/mm.h>
8#include <linux/backing-dev.h>
9#include <linux/hash.h>
10#include <linux/swap.h>
11#include <linux/security.h>
12#include <linux/cdev.h>
13#include <linux/bootmem.h>
14#include <linux/fsnotify.h>
15#include <linux/mount.h>
16#include <linux/posix_acl.h>
17#include <linux/prefetch.h>
18#include <linux/buffer_head.h>
19#include <linux/ratelimit.h>
20#include "internal.h"
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53static unsigned int i_hash_mask __read_mostly;
54static unsigned int i_hash_shift __read_mostly;
55static struct hlist_head *inode_hashtable __read_mostly;
56static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
57
58__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
59
60
61
62
63
64const struct address_space_operations empty_aops = {
65};
66EXPORT_SYMBOL(empty_aops);
67
68
69
70
71struct inodes_stat_t inodes_stat;
72
73static DEFINE_PER_CPU(unsigned int, nr_inodes);
74static DEFINE_PER_CPU(unsigned int, nr_unused);
75
76static struct kmem_cache *inode_cachep __read_mostly;
77
78static int get_nr_inodes(void)
79{
80 int i;
81 int sum = 0;
82 for_each_possible_cpu(i)
83 sum += per_cpu(nr_inodes, i);
84 return sum < 0 ? 0 : sum;
85}
86
87static inline int get_nr_inodes_unused(void)
88{
89 int i;
90 int sum = 0;
91 for_each_possible_cpu(i)
92 sum += per_cpu(nr_unused, i);
93 return sum < 0 ? 0 : sum;
94}
95
96int get_nr_dirty_inodes(void)
97{
98
99 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
100 return nr_dirty > 0 ? nr_dirty : 0;
101}
102
103
104
105
106#ifdef CONFIG_SYSCTL
107int proc_nr_inodes(ctl_table *table, int write,
108 void __user *buffer, size_t *lenp, loff_t *ppos)
109{
110 inodes_stat.nr_inodes = get_nr_inodes();
111 inodes_stat.nr_unused = get_nr_inodes_unused();
112 return proc_dointvec(table, write, buffer, lenp, ppos);
113}
114#endif
115
116
117
118
119
120
121
122
123
124int inode_init_always(struct super_block *sb, struct inode *inode)
125{
126 static const struct inode_operations empty_iops;
127 static const struct file_operations empty_fops;
128 struct address_space *const mapping = &inode->i_data;
129
130 inode->i_sb = sb;
131 inode->i_blkbits = sb->s_blocksize_bits;
132 inode->i_flags = 0;
133 atomic_set(&inode->i_count, 1);
134 inode->i_op = &empty_iops;
135 inode->i_fop = &empty_fops;
136 inode->__i_nlink = 1;
137 inode->i_opflags = 0;
138 i_uid_write(inode, 0);
139 i_gid_write(inode, 0);
140 atomic_set(&inode->i_writecount, 0);
141 inode->i_size = 0;
142 inode->i_blocks = 0;
143 inode->i_bytes = 0;
144 inode->i_generation = 0;
145#ifdef CONFIG_QUOTA
146 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
147#endif
148 inode->i_pipe = NULL;
149 inode->i_bdev = NULL;
150 inode->i_cdev = NULL;
151 inode->i_rdev = 0;
152 inode->dirtied_when = 0;
153
154 if (security_inode_alloc(inode))
155 goto out;
156 spin_lock_init(&inode->i_lock);
157 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
158
159 mutex_init(&inode->i_mutex);
160 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
161
162 atomic_set(&inode->i_dio_count, 0);
163
164 mapping->a_ops = &empty_aops;
165 mapping->host = inode;
166 mapping->flags = 0;
167 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
168 mapping->assoc_mapping = NULL;
169 mapping->backing_dev_info = &default_backing_dev_info;
170 mapping->writeback_index = 0;
171
172
173
174
175
176
177 if (sb->s_bdev) {
178 struct backing_dev_info *bdi;
179
180 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
181 mapping->backing_dev_info = bdi;
182 }
183 inode->i_private = NULL;
184 inode->i_mapping = mapping;
185 INIT_HLIST_HEAD(&inode->i_dentry);
186#ifdef CONFIG_FS_POSIX_ACL
187 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
188#endif
189
190#ifdef CONFIG_FSNOTIFY
191 inode->i_fsnotify_mask = 0;
192#endif
193
194 this_cpu_inc(nr_inodes);
195
196 return 0;
197out:
198 return -ENOMEM;
199}
200EXPORT_SYMBOL(inode_init_always);
201
202static struct inode *alloc_inode(struct super_block *sb)
203{
204 struct inode *inode;
205
206 if (sb->s_op->alloc_inode)
207 inode = sb->s_op->alloc_inode(sb);
208 else
209 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
210
211 if (!inode)
212 return NULL;
213
214 if (unlikely(inode_init_always(sb, inode))) {
215 if (inode->i_sb->s_op->destroy_inode)
216 inode->i_sb->s_op->destroy_inode(inode);
217 else
218 kmem_cache_free(inode_cachep, inode);
219 return NULL;
220 }
221
222 return inode;
223}
224
225void free_inode_nonrcu(struct inode *inode)
226{
227 kmem_cache_free(inode_cachep, inode);
228}
229EXPORT_SYMBOL(free_inode_nonrcu);
230
231void __destroy_inode(struct inode *inode)
232{
233 BUG_ON(inode_has_buffers(inode));
234 security_inode_free(inode);
235 fsnotify_inode_delete(inode);
236 if (!inode->i_nlink) {
237 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
238 atomic_long_dec(&inode->i_sb->s_remove_count);
239 }
240
241#ifdef CONFIG_FS_POSIX_ACL
242 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
243 posix_acl_release(inode->i_acl);
244 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
245 posix_acl_release(inode->i_default_acl);
246#endif
247 this_cpu_dec(nr_inodes);
248}
249EXPORT_SYMBOL(__destroy_inode);
250
251static void i_callback(struct rcu_head *head)
252{
253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 kmem_cache_free(inode_cachep, inode);
255}
256
257static void destroy_inode(struct inode *inode)
258{
259 BUG_ON(!list_empty(&inode->i_lru));
260 __destroy_inode(inode);
261 if (inode->i_sb->s_op->destroy_inode)
262 inode->i_sb->s_op->destroy_inode(inode);
263 else
264 call_rcu(&inode->i_rcu, i_callback);
265}
266
267
268
269
270
271
272
273
274
275
276
277
278void drop_nlink(struct inode *inode)
279{
280 WARN_ON(inode->i_nlink == 0);
281 inode->__i_nlink--;
282 if (!inode->i_nlink)
283 atomic_long_inc(&inode->i_sb->s_remove_count);
284}
285EXPORT_SYMBOL(drop_nlink);
286
287
288
289
290
291
292
293
294
295void clear_nlink(struct inode *inode)
296{
297 if (inode->i_nlink) {
298 inode->__i_nlink = 0;
299 atomic_long_inc(&inode->i_sb->s_remove_count);
300 }
301}
302EXPORT_SYMBOL(clear_nlink);
303
304
305
306
307
308
309
310
311
312void set_nlink(struct inode *inode, unsigned int nlink)
313{
314 if (!nlink) {
315 clear_nlink(inode);
316 } else {
317
318 if (inode->i_nlink == 0)
319 atomic_long_dec(&inode->i_sb->s_remove_count);
320
321 inode->__i_nlink = nlink;
322 }
323}
324EXPORT_SYMBOL(set_nlink);
325
326
327
328
329
330
331
332
333
334void inc_nlink(struct inode *inode)
335{
336 if (WARN_ON(inode->i_nlink == 0))
337 atomic_long_dec(&inode->i_sb->s_remove_count);
338
339 inode->__i_nlink++;
340}
341EXPORT_SYMBOL(inc_nlink);
342
343void address_space_init_once(struct address_space *mapping)
344{
345 memset(mapping, 0, sizeof(*mapping));
346 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
347 spin_lock_init(&mapping->tree_lock);
348 mutex_init(&mapping->i_mmap_mutex);
349 INIT_LIST_HEAD(&mapping->private_list);
350 spin_lock_init(&mapping->private_lock);
351 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
352 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
353}
354EXPORT_SYMBOL(address_space_init_once);
355
356
357
358
359
360
361void inode_init_once(struct inode *inode)
362{
363 memset(inode, 0, sizeof(*inode));
364 INIT_HLIST_NODE(&inode->i_hash);
365 INIT_LIST_HEAD(&inode->i_devices);
366 INIT_LIST_HEAD(&inode->i_wb_list);
367 INIT_LIST_HEAD(&inode->i_lru);
368 address_space_init_once(&inode->i_data);
369 i_size_ordered_init(inode);
370#ifdef CONFIG_FSNOTIFY
371 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
372#endif
373}
374EXPORT_SYMBOL(inode_init_once);
375
376static void init_once(void *foo)
377{
378 struct inode *inode = (struct inode *) foo;
379
380 inode_init_once(inode);
381}
382
383
384
385
386void __iget(struct inode *inode)
387{
388 atomic_inc(&inode->i_count);
389}
390
391
392
393
394void ihold(struct inode *inode)
395{
396 WARN_ON(atomic_inc_return(&inode->i_count) < 2);
397}
398EXPORT_SYMBOL(ihold);
399
400static void inode_lru_list_add(struct inode *inode)
401{
402 spin_lock(&inode->i_sb->s_inode_lru_lock);
403 if (list_empty(&inode->i_lru)) {
404 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
405 inode->i_sb->s_nr_inodes_unused++;
406 this_cpu_inc(nr_unused);
407 }
408 spin_unlock(&inode->i_sb->s_inode_lru_lock);
409}
410
411static void inode_lru_list_del(struct inode *inode)
412{
413 spin_lock(&inode->i_sb->s_inode_lru_lock);
414 if (!list_empty(&inode->i_lru)) {
415 list_del_init(&inode->i_lru);
416 inode->i_sb->s_nr_inodes_unused--;
417 this_cpu_dec(nr_unused);
418 }
419 spin_unlock(&inode->i_sb->s_inode_lru_lock);
420}
421
422
423
424
425
426void inode_sb_list_add(struct inode *inode)
427{
428 spin_lock(&inode_sb_list_lock);
429 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
430 spin_unlock(&inode_sb_list_lock);
431}
432EXPORT_SYMBOL_GPL(inode_sb_list_add);
433
434static inline void inode_sb_list_del(struct inode *inode)
435{
436 if (!list_empty(&inode->i_sb_list)) {
437 spin_lock(&inode_sb_list_lock);
438 list_del_init(&inode->i_sb_list);
439 spin_unlock(&inode_sb_list_lock);
440 }
441}
442
443static unsigned long hash(struct super_block *sb, unsigned long hashval)
444{
445 unsigned long tmp;
446
447 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
448 L1_CACHE_BYTES;
449 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
450 return tmp & i_hash_mask;
451}
452
453
454
455
456
457
458
459
460
461void __insert_inode_hash(struct inode *inode, unsigned long hashval)
462{
463 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
464
465 spin_lock(&inode_hash_lock);
466 spin_lock(&inode->i_lock);
467 hlist_add_head(&inode->i_hash, b);
468 spin_unlock(&inode->i_lock);
469 spin_unlock(&inode_hash_lock);
470}
471EXPORT_SYMBOL(__insert_inode_hash);
472
473
474
475
476
477
478
479void __remove_inode_hash(struct inode *inode)
480{
481 spin_lock(&inode_hash_lock);
482 spin_lock(&inode->i_lock);
483 hlist_del_init(&inode->i_hash);
484 spin_unlock(&inode->i_lock);
485 spin_unlock(&inode_hash_lock);
486}
487EXPORT_SYMBOL(__remove_inode_hash);
488
489void clear_inode(struct inode *inode)
490{
491 might_sleep();
492
493
494
495
496
497 spin_lock_irq(&inode->i_data.tree_lock);
498 BUG_ON(inode->i_data.nrpages);
499 spin_unlock_irq(&inode->i_data.tree_lock);
500 BUG_ON(!list_empty(&inode->i_data.private_list));
501 BUG_ON(!(inode->i_state & I_FREEING));
502 BUG_ON(inode->i_state & I_CLEAR);
503
504 inode->i_state = I_FREEING | I_CLEAR;
505}
506EXPORT_SYMBOL(clear_inode);
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521static void evict(struct inode *inode)
522{
523 const struct super_operations *op = inode->i_sb->s_op;
524
525 BUG_ON(!(inode->i_state & I_FREEING));
526 BUG_ON(!list_empty(&inode->i_lru));
527
528 if (!list_empty(&inode->i_wb_list))
529 inode_wb_list_del(inode);
530
531 inode_sb_list_del(inode);
532
533
534
535
536
537
538
539 inode_wait_for_writeback(inode);
540
541 if (op->evict_inode) {
542 op->evict_inode(inode);
543 } else {
544 if (inode->i_data.nrpages)
545 truncate_inode_pages(&inode->i_data, 0);
546 clear_inode(inode);
547 }
548 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
549 bd_forget(inode);
550 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
551 cd_forget(inode);
552
553 remove_inode_hash(inode);
554
555 spin_lock(&inode->i_lock);
556 wake_up_bit(&inode->i_state, __I_NEW);
557 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
558 spin_unlock(&inode->i_lock);
559
560 destroy_inode(inode);
561}
562
563
564
565
566
567
568
569
570static void dispose_list(struct list_head *head)
571{
572 while (!list_empty(head)) {
573 struct inode *inode;
574
575 inode = list_first_entry(head, struct inode, i_lru);
576 list_del_init(&inode->i_lru);
577
578 evict(inode);
579 }
580}
581
582
583
584
585
586
587
588
589
590
591void evict_inodes(struct super_block *sb)
592{
593 struct inode *inode, *next;
594 LIST_HEAD(dispose);
595
596 spin_lock(&inode_sb_list_lock);
597 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
598 if (atomic_read(&inode->i_count))
599 continue;
600
601 spin_lock(&inode->i_lock);
602 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
603 spin_unlock(&inode->i_lock);
604 continue;
605 }
606
607 inode->i_state |= I_FREEING;
608 inode_lru_list_del(inode);
609 spin_unlock(&inode->i_lock);
610 list_add(&inode->i_lru, &dispose);
611 }
612 spin_unlock(&inode_sb_list_lock);
613
614 dispose_list(&dispose);
615}
616
617
618
619
620
621
622
623
624
625
626
627int invalidate_inodes(struct super_block *sb, bool kill_dirty)
628{
629 int busy = 0;
630 struct inode *inode, *next;
631 LIST_HEAD(dispose);
632
633 spin_lock(&inode_sb_list_lock);
634 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
635 spin_lock(&inode->i_lock);
636 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
637 spin_unlock(&inode->i_lock);
638 continue;
639 }
640 if (inode->i_state & I_DIRTY && !kill_dirty) {
641 spin_unlock(&inode->i_lock);
642 busy = 1;
643 continue;
644 }
645 if (atomic_read(&inode->i_count)) {
646 spin_unlock(&inode->i_lock);
647 busy = 1;
648 continue;
649 }
650
651 inode->i_state |= I_FREEING;
652 inode_lru_list_del(inode);
653 spin_unlock(&inode->i_lock);
654 list_add(&inode->i_lru, &dispose);
655 }
656 spin_unlock(&inode_sb_list_lock);
657
658 dispose_list(&dispose);
659
660 return busy;
661}
662
663static int can_unuse(struct inode *inode)
664{
665 if (inode->i_state & ~I_REFERENCED)
666 return 0;
667 if (inode_has_buffers(inode))
668 return 0;
669 if (atomic_read(&inode->i_count))
670 return 0;
671 if (inode->i_data.nrpages)
672 return 0;
673 return 1;
674}
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694void prune_icache_sb(struct super_block *sb, int nr_to_scan)
695{
696 LIST_HEAD(freeable);
697 int nr_scanned;
698 unsigned long reap = 0;
699
700 spin_lock(&sb->s_inode_lru_lock);
701 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
702 struct inode *inode;
703
704 if (list_empty(&sb->s_inode_lru))
705 break;
706
707 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
708
709
710
711
712
713
714 if (!spin_trylock(&inode->i_lock)) {
715 list_move_tail(&inode->i_lru, &sb->s_inode_lru);
716 continue;
717 }
718
719
720
721
722
723 if (atomic_read(&inode->i_count) ||
724 (inode->i_state & ~I_REFERENCED)) {
725 list_del_init(&inode->i_lru);
726 spin_unlock(&inode->i_lock);
727 sb->s_nr_inodes_unused--;
728 this_cpu_dec(nr_unused);
729 continue;
730 }
731
732
733 if (inode->i_state & I_REFERENCED) {
734 inode->i_state &= ~I_REFERENCED;
735 list_move(&inode->i_lru, &sb->s_inode_lru);
736 spin_unlock(&inode->i_lock);
737 continue;
738 }
739 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
740 __iget(inode);
741 spin_unlock(&inode->i_lock);
742 spin_unlock(&sb->s_inode_lru_lock);
743 if (remove_inode_buffers(inode))
744 reap += invalidate_mapping_pages(&inode->i_data,
745 0, -1);
746 iput(inode);
747 spin_lock(&sb->s_inode_lru_lock);
748
749 if (inode != list_entry(sb->s_inode_lru.next,
750 struct inode, i_lru))
751 continue;
752
753 if (!spin_trylock(&inode->i_lock))
754 continue;
755 if (!can_unuse(inode)) {
756 spin_unlock(&inode->i_lock);
757 continue;
758 }
759 }
760 WARN_ON(inode->i_state & I_NEW);
761 inode->i_state |= I_FREEING;
762 spin_unlock(&inode->i_lock);
763
764 list_move(&inode->i_lru, &freeable);
765 sb->s_nr_inodes_unused--;
766 this_cpu_dec(nr_unused);
767 }
768 if (current_is_kswapd())
769 __count_vm_events(KSWAPD_INODESTEAL, reap);
770 else
771 __count_vm_events(PGINODESTEAL, reap);
772 spin_unlock(&sb->s_inode_lru_lock);
773 if (current->reclaim_state)
774 current->reclaim_state->reclaimed_slab += reap;
775
776 dispose_list(&freeable);
777}
778
779static void __wait_on_freeing_inode(struct inode *inode);
780
781
782
783static struct inode *find_inode(struct super_block *sb,
784 struct hlist_head *head,
785 int (*test)(struct inode *, void *),
786 void *data)
787{
788 struct hlist_node *node;
789 struct inode *inode = NULL;
790
791repeat:
792 hlist_for_each_entry(inode, node, head, i_hash) {
793 spin_lock(&inode->i_lock);
794 if (inode->i_sb != sb) {
795 spin_unlock(&inode->i_lock);
796 continue;
797 }
798 if (!test(inode, data)) {
799 spin_unlock(&inode->i_lock);
800 continue;
801 }
802 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
803 __wait_on_freeing_inode(inode);
804 goto repeat;
805 }
806 __iget(inode);
807 spin_unlock(&inode->i_lock);
808 return inode;
809 }
810 return NULL;
811}
812
813
814
815
816
817static struct inode *find_inode_fast(struct super_block *sb,
818 struct hlist_head *head, unsigned long ino)
819{
820 struct hlist_node *node;
821 struct inode *inode = NULL;
822
823repeat:
824 hlist_for_each_entry(inode, node, head, i_hash) {
825 spin_lock(&inode->i_lock);
826 if (inode->i_ino != ino) {
827 spin_unlock(&inode->i_lock);
828 continue;
829 }
830 if (inode->i_sb != sb) {
831 spin_unlock(&inode->i_lock);
832 continue;
833 }
834 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
835 __wait_on_freeing_inode(inode);
836 goto repeat;
837 }
838 __iget(inode);
839 spin_unlock(&inode->i_lock);
840 return inode;
841 }
842 return NULL;
843}
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860#define LAST_INO_BATCH 1024
861static DEFINE_PER_CPU(unsigned int, last_ino);
862
863unsigned int get_next_ino(void)
864{
865 unsigned int *p = &get_cpu_var(last_ino);
866 unsigned int res = *p;
867
868#ifdef CONFIG_SMP
869 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
870 static atomic_t shared_last_ino;
871 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
872
873 res = next - LAST_INO_BATCH;
874 }
875#endif
876
877 *p = ++res;
878 put_cpu_var(last_ino);
879 return res;
880}
881EXPORT_SYMBOL(get_next_ino);
882
883
884
885
886
887
888
889
890
891
892
893struct inode *new_inode_pseudo(struct super_block *sb)
894{
895 struct inode *inode = alloc_inode(sb);
896
897 if (inode) {
898 spin_lock(&inode->i_lock);
899 inode->i_state = 0;
900 spin_unlock(&inode->i_lock);
901 INIT_LIST_HEAD(&inode->i_sb_list);
902 }
903 return inode;
904}
905
906
907
908
909
910
911
912
913
914
915
916
917
918struct inode *new_inode(struct super_block *sb)
919{
920 struct inode *inode;
921
922 spin_lock_prefetch(&inode_sb_list_lock);
923
924 inode = new_inode_pseudo(sb);
925 if (inode)
926 inode_sb_list_add(inode);
927 return inode;
928}
929EXPORT_SYMBOL(new_inode);
930
931#ifdef CONFIG_DEBUG_LOCK_ALLOC
932void lockdep_annotate_inode_mutex_key(struct inode *inode)
933{
934 if (S_ISDIR(inode->i_mode)) {
935 struct file_system_type *type = inode->i_sb->s_type;
936
937
938 if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) {
939
940
941
942 mutex_destroy(&inode->i_mutex);
943 mutex_init(&inode->i_mutex);
944 lockdep_set_class(&inode->i_mutex,
945 &type->i_mutex_dir_key);
946 }
947 }
948}
949EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
950#endif
951
952
953
954
955
956
957
958
959void unlock_new_inode(struct inode *inode)
960{
961 lockdep_annotate_inode_mutex_key(inode);
962 spin_lock(&inode->i_lock);
963 WARN_ON(!(inode->i_state & I_NEW));
964 inode->i_state &= ~I_NEW;
965 smp_mb();
966 wake_up_bit(&inode->i_state, __I_NEW);
967 spin_unlock(&inode->i_lock);
968}
969EXPORT_SYMBOL(unlock_new_inode);
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
992 int (*test)(struct inode *, void *),
993 int (*set)(struct inode *, void *), void *data)
994{
995 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
996 struct inode *inode;
997
998 spin_lock(&inode_hash_lock);
999 inode = find_inode(sb, head, test, data);
1000 spin_unlock(&inode_hash_lock);
1001
1002 if (inode) {
1003 wait_on_inode(inode);
1004 return inode;
1005 }
1006
1007 inode = alloc_inode(sb);
1008 if (inode) {
1009 struct inode *old;
1010
1011 spin_lock(&inode_hash_lock);
1012
1013 old = find_inode(sb, head, test, data);
1014 if (!old) {
1015 if (set(inode, data))
1016 goto set_failed;
1017
1018 spin_lock(&inode->i_lock);
1019 inode->i_state = I_NEW;
1020 hlist_add_head(&inode->i_hash, head);
1021 spin_unlock(&inode->i_lock);
1022 inode_sb_list_add(inode);
1023 spin_unlock(&inode_hash_lock);
1024
1025
1026
1027
1028 return inode;
1029 }
1030
1031
1032
1033
1034
1035
1036 spin_unlock(&inode_hash_lock);
1037 destroy_inode(inode);
1038 inode = old;
1039 wait_on_inode(inode);
1040 }
1041 return inode;
1042
1043set_failed:
1044 spin_unlock(&inode_hash_lock);
1045 destroy_inode(inode);
1046 return NULL;
1047}
1048EXPORT_SYMBOL(iget5_locked);
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1064{
1065 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1066 struct inode *inode;
1067
1068 spin_lock(&inode_hash_lock);
1069 inode = find_inode_fast(sb, head, ino);
1070 spin_unlock(&inode_hash_lock);
1071 if (inode) {
1072 wait_on_inode(inode);
1073 return inode;
1074 }
1075
1076 inode = alloc_inode(sb);
1077 if (inode) {
1078 struct inode *old;
1079
1080 spin_lock(&inode_hash_lock);
1081
1082 old = find_inode_fast(sb, head, ino);
1083 if (!old) {
1084 inode->i_ino = ino;
1085 spin_lock(&inode->i_lock);
1086 inode->i_state = I_NEW;
1087 hlist_add_head(&inode->i_hash, head);
1088 spin_unlock(&inode->i_lock);
1089 inode_sb_list_add(inode);
1090 spin_unlock(&inode_hash_lock);
1091
1092
1093
1094
1095 return inode;
1096 }
1097
1098
1099
1100
1101
1102
1103 spin_unlock(&inode_hash_lock);
1104 destroy_inode(inode);
1105 inode = old;
1106 wait_on_inode(inode);
1107 }
1108 return inode;
1109}
1110EXPORT_SYMBOL(iget_locked);
1111
1112
1113
1114
1115
1116
1117
1118
1119static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1120{
1121 struct hlist_head *b = inode_hashtable + hash(sb, ino);
1122 struct hlist_node *node;
1123 struct inode *inode;
1124
1125 spin_lock(&inode_hash_lock);
1126 hlist_for_each_entry(inode, node, b, i_hash) {
1127 if (inode->i_ino == ino && inode->i_sb == sb) {
1128 spin_unlock(&inode_hash_lock);
1129 return 0;
1130 }
1131 }
1132 spin_unlock(&inode_hash_lock);
1133
1134 return 1;
1135}
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151ino_t iunique(struct super_block *sb, ino_t max_reserved)
1152{
1153
1154
1155
1156
1157
1158 static DEFINE_SPINLOCK(iunique_lock);
1159 static unsigned int counter;
1160 ino_t res;
1161
1162 spin_lock(&iunique_lock);
1163 do {
1164 if (counter <= max_reserved)
1165 counter = max_reserved + 1;
1166 res = counter++;
1167 } while (!test_inode_iunique(sb, res));
1168 spin_unlock(&iunique_lock);
1169
1170 return res;
1171}
1172EXPORT_SYMBOL(iunique);
1173
1174struct inode *igrab(struct inode *inode)
1175{
1176 spin_lock(&inode->i_lock);
1177 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1178 __iget(inode);
1179 spin_unlock(&inode->i_lock);
1180 } else {
1181 spin_unlock(&inode->i_lock);
1182
1183
1184
1185
1186
1187 inode = NULL;
1188 }
1189 return inode;
1190}
1191EXPORT_SYMBOL(igrab);
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1210 int (*test)(struct inode *, void *), void *data)
1211{
1212 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1213 struct inode *inode;
1214
1215 spin_lock(&inode_hash_lock);
1216 inode = find_inode(sb, head, test, data);
1217 spin_unlock(&inode_hash_lock);
1218
1219 return inode;
1220}
1221EXPORT_SYMBOL(ilookup5_nowait);
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1241 int (*test)(struct inode *, void *), void *data)
1242{
1243 struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
1244
1245 if (inode)
1246 wait_on_inode(inode);
1247 return inode;
1248}
1249EXPORT_SYMBOL(ilookup5);
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259struct inode *ilookup(struct super_block *sb, unsigned long ino)
1260{
1261 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1262 struct inode *inode;
1263
1264 spin_lock(&inode_hash_lock);
1265 inode = find_inode_fast(sb, head, ino);
1266 spin_unlock(&inode_hash_lock);
1267
1268 if (inode)
1269 wait_on_inode(inode);
1270 return inode;
1271}
1272EXPORT_SYMBOL(ilookup);
1273
1274int insert_inode_locked(struct inode *inode)
1275{
1276 struct super_block *sb = inode->i_sb;
1277 ino_t ino = inode->i_ino;
1278 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1279
1280 while (1) {
1281 struct hlist_node *node;
1282 struct inode *old = NULL;
1283 spin_lock(&inode_hash_lock);
1284 hlist_for_each_entry(old, node, head, i_hash) {
1285 if (old->i_ino != ino)
1286 continue;
1287 if (old->i_sb != sb)
1288 continue;
1289 spin_lock(&old->i_lock);
1290 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1291 spin_unlock(&old->i_lock);
1292 continue;
1293 }
1294 break;
1295 }
1296 if (likely(!node)) {
1297 spin_lock(&inode->i_lock);
1298 inode->i_state |= I_NEW;
1299 hlist_add_head(&inode->i_hash, head);
1300 spin_unlock(&inode->i_lock);
1301 spin_unlock(&inode_hash_lock);
1302 return 0;
1303 }
1304 __iget(old);
1305 spin_unlock(&old->i_lock);
1306 spin_unlock(&inode_hash_lock);
1307 wait_on_inode(old);
1308 if (unlikely(!inode_unhashed(old))) {
1309 iput(old);
1310 return -EBUSY;
1311 }
1312 iput(old);
1313 }
1314}
1315EXPORT_SYMBOL(insert_inode_locked);
1316
1317int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1318 int (*test)(struct inode *, void *), void *data)
1319{
1320 struct super_block *sb = inode->i_sb;
1321 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1322
1323 while (1) {
1324 struct hlist_node *node;
1325 struct inode *old = NULL;
1326
1327 spin_lock(&inode_hash_lock);
1328 hlist_for_each_entry(old, node, head, i_hash) {
1329 if (old->i_sb != sb)
1330 continue;
1331 if (!test(old, data))
1332 continue;
1333 spin_lock(&old->i_lock);
1334 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1335 spin_unlock(&old->i_lock);
1336 continue;
1337 }
1338 break;
1339 }
1340 if (likely(!node)) {
1341 spin_lock(&inode->i_lock);
1342 inode->i_state |= I_NEW;
1343 hlist_add_head(&inode->i_hash, head);
1344 spin_unlock(&inode->i_lock);
1345 spin_unlock(&inode_hash_lock);
1346 return 0;
1347 }
1348 __iget(old);
1349 spin_unlock(&old->i_lock);
1350 spin_unlock(&inode_hash_lock);
1351 wait_on_inode(old);
1352 if (unlikely(!inode_unhashed(old))) {
1353 iput(old);
1354 return -EBUSY;
1355 }
1356 iput(old);
1357 }
1358}
1359EXPORT_SYMBOL(insert_inode_locked4);
1360
1361
1362int generic_delete_inode(struct inode *inode)
1363{
1364 return 1;
1365}
1366EXPORT_SYMBOL(generic_delete_inode);
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378static void iput_final(struct inode *inode)
1379{
1380 struct super_block *sb = inode->i_sb;
1381 const struct super_operations *op = inode->i_sb->s_op;
1382 int drop;
1383
1384 WARN_ON(inode->i_state & I_NEW);
1385
1386 if (op->drop_inode)
1387 drop = op->drop_inode(inode);
1388 else
1389 drop = generic_drop_inode(inode);
1390
1391 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1392 inode->i_state |= I_REFERENCED;
1393 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1394 inode_lru_list_add(inode);
1395 spin_unlock(&inode->i_lock);
1396 return;
1397 }
1398
1399 if (!drop) {
1400 inode->i_state |= I_WILL_FREE;
1401 spin_unlock(&inode->i_lock);
1402 write_inode_now(inode, 1);
1403 spin_lock(&inode->i_lock);
1404 WARN_ON(inode->i_state & I_NEW);
1405 inode->i_state &= ~I_WILL_FREE;
1406 }
1407
1408 inode->i_state |= I_FREEING;
1409 if (!list_empty(&inode->i_lru))
1410 inode_lru_list_del(inode);
1411 spin_unlock(&inode->i_lock);
1412
1413 evict(inode);
1414}
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425void iput(struct inode *inode)
1426{
1427 if (inode) {
1428 BUG_ON(inode->i_state & I_CLEAR);
1429
1430 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
1431 iput_final(inode);
1432 }
1433}
1434EXPORT_SYMBOL(iput);
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447sector_t bmap(struct inode *inode, sector_t block)
1448{
1449 sector_t res = 0;
1450 if (inode->i_mapping->a_ops->bmap)
1451 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1452 return res;
1453}
1454EXPORT_SYMBOL(bmap);
1455
1456
1457
1458
1459
1460
1461static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1462 struct timespec now)
1463{
1464
1465 if (!(mnt->mnt_flags & MNT_RELATIME))
1466 return 1;
1467
1468
1469
1470 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1471 return 1;
1472
1473
1474
1475 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1476 return 1;
1477
1478
1479
1480
1481
1482 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1483 return 1;
1484
1485
1486
1487 return 0;
1488}
1489
1490
1491
1492
1493
1494static int update_time(struct inode *inode, struct timespec *time, int flags)
1495{
1496 if (inode->i_op->update_time)
1497 return inode->i_op->update_time(inode, time, flags);
1498
1499 if (flags & S_ATIME)
1500 inode->i_atime = *time;
1501 if (flags & S_VERSION)
1502 inode_inc_iversion(inode);
1503 if (flags & S_CTIME)
1504 inode->i_ctime = *time;
1505 if (flags & S_MTIME)
1506 inode->i_mtime = *time;
1507 mark_inode_dirty_sync(inode);
1508 return 0;
1509}
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519void touch_atime(struct path *path)
1520{
1521 struct vfsmount *mnt = path->mnt;
1522 struct inode *inode = path->dentry->d_inode;
1523 struct timespec now;
1524
1525 if (inode->i_flags & S_NOATIME)
1526 return;
1527 if (IS_NOATIME(inode))
1528 return;
1529 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1530 return;
1531
1532 if (mnt->mnt_flags & MNT_NOATIME)
1533 return;
1534 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1535 return;
1536
1537 now = current_fs_time(inode->i_sb);
1538
1539 if (!relatime_need_update(mnt, inode, now))
1540 return;
1541
1542 if (timespec_equal(&inode->i_atime, &now))
1543 return;
1544
1545 if (!sb_start_write_trylock(inode->i_sb))
1546 return;
1547
1548 if (__mnt_want_write(mnt))
1549 goto skip_update;
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559 update_time(inode, &now, S_ATIME);
1560 __mnt_drop_write(mnt);
1561skip_update:
1562 sb_end_write(inode->i_sb);
1563}
1564EXPORT_SYMBOL(touch_atime);
1565
1566
1567
1568
1569
1570
1571
1572int should_remove_suid(struct dentry *dentry)
1573{
1574 umode_t mode = dentry->d_inode->i_mode;
1575 int kill = 0;
1576
1577
1578 if (unlikely(mode & S_ISUID))
1579 kill = ATTR_KILL_SUID;
1580
1581
1582
1583
1584
1585 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1586 kill |= ATTR_KILL_SGID;
1587
1588 if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1589 return kill;
1590
1591 return 0;
1592}
1593EXPORT_SYMBOL(should_remove_suid);
1594
1595static int __remove_suid(struct dentry *dentry, int kill)
1596{
1597 struct iattr newattrs;
1598
1599 newattrs.ia_valid = ATTR_FORCE | kill;
1600 return notify_change(dentry, &newattrs);
1601}
1602
1603int file_remove_suid(struct file *file)
1604{
1605 struct dentry *dentry = file->f_path.dentry;
1606 struct inode *inode = dentry->d_inode;
1607 int killsuid;
1608 int killpriv;
1609 int error = 0;
1610
1611
1612 if (IS_NOSEC(inode))
1613 return 0;
1614
1615 killsuid = should_remove_suid(dentry);
1616 killpriv = security_inode_need_killpriv(dentry);
1617
1618 if (killpriv < 0)
1619 return killpriv;
1620 if (killpriv)
1621 error = security_inode_killpriv(dentry);
1622 if (!error && killsuid)
1623 error = __remove_suid(dentry, killsuid);
1624 if (!error && (inode->i_sb->s_flags & MS_NOSEC))
1625 inode->i_flags |= S_NOSEC;
1626
1627 return error;
1628}
1629EXPORT_SYMBOL(file_remove_suid);
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644int file_update_time(struct file *file)
1645{
1646 struct inode *inode = file->f_path.dentry->d_inode;
1647 struct timespec now;
1648 int sync_it = 0;
1649 int ret;
1650
1651
1652 if (IS_NOCMTIME(inode))
1653 return 0;
1654
1655 now = current_fs_time(inode->i_sb);
1656 if (!timespec_equal(&inode->i_mtime, &now))
1657 sync_it = S_MTIME;
1658
1659 if (!timespec_equal(&inode->i_ctime, &now))
1660 sync_it |= S_CTIME;
1661
1662 if (IS_I_VERSION(inode))
1663 sync_it |= S_VERSION;
1664
1665 if (!sync_it)
1666 return 0;
1667
1668
1669 if (__mnt_want_write_file(file))
1670 return 0;
1671
1672 ret = update_time(inode, &now, sync_it);
1673 __mnt_drop_write_file(file);
1674
1675 return ret;
1676}
1677EXPORT_SYMBOL(file_update_time);
1678
1679int inode_needs_sync(struct inode *inode)
1680{
1681 if (IS_SYNC(inode))
1682 return 1;
1683 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
1684 return 1;
1685 return 0;
1686}
1687EXPORT_SYMBOL(inode_needs_sync);
1688
1689int inode_wait(void *word)
1690{
1691 schedule();
1692 return 0;
1693}
1694EXPORT_SYMBOL(inode_wait);
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707static void __wait_on_freeing_inode(struct inode *inode)
1708{
1709 wait_queue_head_t *wq;
1710 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1711 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1712 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1713 spin_unlock(&inode->i_lock);
1714 spin_unlock(&inode_hash_lock);
1715 schedule();
1716 finish_wait(wq, &wait.wait);
1717 spin_lock(&inode_hash_lock);
1718}
1719
1720static __initdata unsigned long ihash_entries;
1721static int __init set_ihash_entries(char *str)
1722{
1723 if (!str)
1724 return 0;
1725 ihash_entries = simple_strtoul(str, &str, 0);
1726 return 1;
1727}
1728__setup("ihash_entries=", set_ihash_entries);
1729
1730
1731
1732
1733void __init inode_init_early(void)
1734{
1735 unsigned int loop;
1736
1737
1738
1739
1740 if (hashdist)
1741 return;
1742
1743 inode_hashtable =
1744 alloc_large_system_hash("Inode-cache",
1745 sizeof(struct hlist_head),
1746 ihash_entries,
1747 14,
1748 HASH_EARLY,
1749 &i_hash_shift,
1750 &i_hash_mask,
1751 0,
1752 0);
1753
1754 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1755 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1756}
1757
1758void __init inode_init(void)
1759{
1760 unsigned int loop;
1761
1762
1763 inode_cachep = kmem_cache_create("inode_cache",
1764 sizeof(struct inode),
1765 0,
1766 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1767 SLAB_MEM_SPREAD),
1768 init_once);
1769
1770
1771 if (!hashdist)
1772 return;
1773
1774 inode_hashtable =
1775 alloc_large_system_hash("Inode-cache",
1776 sizeof(struct hlist_head),
1777 ihash_entries,
1778 14,
1779 0,
1780 &i_hash_shift,
1781 &i_hash_mask,
1782 0,
1783 0);
1784
1785 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1786 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1787}
1788
1789void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1790{
1791 inode->i_mode = mode;
1792 if (S_ISCHR(mode)) {
1793 inode->i_fop = &def_chr_fops;
1794 inode->i_rdev = rdev;
1795 } else if (S_ISBLK(mode)) {
1796 inode->i_fop = &def_blk_fops;
1797 inode->i_rdev = rdev;
1798 } else if (S_ISFIFO(mode))
1799 inode->i_fop = &def_fifo_fops;
1800 else if (S_ISSOCK(mode))
1801 inode->i_fop = &bad_sock_fops;
1802 else
1803 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
1804 " inode %s:%lu\n", mode, inode->i_sb->s_id,
1805 inode->i_ino);
1806}
1807EXPORT_SYMBOL(init_special_inode);
1808
1809
1810
1811
1812
1813
1814
1815void inode_init_owner(struct inode *inode, const struct inode *dir,
1816 umode_t mode)
1817{
1818 inode->i_uid = current_fsuid();
1819 if (dir && dir->i_mode & S_ISGID) {
1820 inode->i_gid = dir->i_gid;
1821 if (S_ISDIR(mode))
1822 mode |= S_ISGID;
1823 } else
1824 inode->i_gid = current_fsgid();
1825 inode->i_mode = mode;
1826}
1827EXPORT_SYMBOL(inode_init_owner);
1828
1829
1830
1831
1832
1833
1834
1835
1836bool inode_owner_or_capable(const struct inode *inode)
1837{
1838 if (uid_eq(current_fsuid(), inode->i_uid))
1839 return true;
1840 if (inode_capable(inode, CAP_FOWNER))
1841 return true;
1842 return false;
1843}
1844EXPORT_SYMBOL(inode_owner_or_capable);
1845
1846
1847
1848
1849static void __inode_dio_wait(struct inode *inode)
1850{
1851 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
1852 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
1853
1854 do {
1855 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
1856 if (atomic_read(&inode->i_dio_count))
1857 schedule();
1858 } while (atomic_read(&inode->i_dio_count));
1859 finish_wait(wq, &q.wait);
1860}
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872void inode_dio_wait(struct inode *inode)
1873{
1874 if (atomic_read(&inode->i_dio_count))
1875 __inode_dio_wait(inode);
1876}
1877EXPORT_SYMBOL(inode_dio_wait);
1878
1879
1880
1881
1882
1883
1884
1885
1886void inode_dio_done(struct inode *inode)
1887{
1888 if (atomic_dec_and_test(&inode->i_dio_count))
1889 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
1890}
1891EXPORT_SYMBOL(inode_dio_done);
1892