1
2
3
4
5#include <linux/export.h>
6#include <linux/fs.h>
7#include <linux/mm.h>
8#include <linux/backing-dev.h>
9#include <linux/hash.h>
10#include <linux/swap.h>
11#include <linux/security.h>
12#include <linux/cdev.h>
13#include <linux/bootmem.h>
14#include <linux/fsnotify.h>
15#include <linux/mount.h>
16#include <linux/posix_acl.h>
17#include <linux/prefetch.h>
18#include <linux/buffer_head.h>
19#include <linux/ratelimit.h>
20#include "internal.h"
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53static unsigned int i_hash_mask __read_mostly;
54static unsigned int i_hash_shift __read_mostly;
55static struct hlist_head *inode_hashtable __read_mostly;
56static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
57
58__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
59
60
61
62
63
64const struct address_space_operations empty_aops = {
65};
66EXPORT_SYMBOL(empty_aops);
67
68
69
70
71struct inodes_stat_t inodes_stat;
72
73static DEFINE_PER_CPU(unsigned int, nr_inodes);
74static DEFINE_PER_CPU(unsigned int, nr_unused);
75
76static struct kmem_cache *inode_cachep __read_mostly;
77
78static int get_nr_inodes(void)
79{
80 int i;
81 int sum = 0;
82 for_each_possible_cpu(i)
83 sum += per_cpu(nr_inodes, i);
84 return sum < 0 ? 0 : sum;
85}
86
87static inline int get_nr_inodes_unused(void)
88{
89 int i;
90 int sum = 0;
91 for_each_possible_cpu(i)
92 sum += per_cpu(nr_unused, i);
93 return sum < 0 ? 0 : sum;
94}
95
96int get_nr_dirty_inodes(void)
97{
98
99 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
100 return nr_dirty > 0 ? nr_dirty : 0;
101}
102
103
104
105
106#ifdef CONFIG_SYSCTL
107int proc_nr_inodes(ctl_table *table, int write,
108 void __user *buffer, size_t *lenp, loff_t *ppos)
109{
110 inodes_stat.nr_inodes = get_nr_inodes();
111 inodes_stat.nr_unused = get_nr_inodes_unused();
112 return proc_dointvec(table, write, buffer, lenp, ppos);
113}
114#endif
115
116
117
118
119
120
121
122
123
124int inode_init_always(struct super_block *sb, struct inode *inode)
125{
126 static const struct inode_operations empty_iops;
127 static const struct file_operations empty_fops;
128 struct address_space *const mapping = &inode->i_data;
129
130 inode->i_sb = sb;
131 inode->i_blkbits = sb->s_blocksize_bits;
132 inode->i_flags = 0;
133 atomic_set(&inode->i_count, 1);
134 inode->i_op = &empty_iops;
135 inode->i_fop = &empty_fops;
136 inode->__i_nlink = 1;
137 inode->i_opflags = 0;
138 i_uid_write(inode, 0);
139 i_gid_write(inode, 0);
140 atomic_set(&inode->i_writecount, 0);
141 inode->i_size = 0;
142 inode->i_blocks = 0;
143 inode->i_bytes = 0;
144 inode->i_generation = 0;
145#ifdef CONFIG_QUOTA
146 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
147#endif
148 inode->i_pipe = NULL;
149 inode->i_bdev = NULL;
150 inode->i_cdev = NULL;
151 inode->i_rdev = 0;
152 inode->dirtied_when = 0;
153
154 if (security_inode_alloc(inode))
155 goto out;
156 spin_lock_init(&inode->i_lock);
157 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
158
159 mutex_init(&inode->i_mutex);
160 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
161
162 atomic_set(&inode->i_dio_count, 0);
163
164 mapping->a_ops = &empty_aops;
165 mapping->host = inode;
166 mapping->flags = 0;
167 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
168 mapping->private_data = NULL;
169 mapping->backing_dev_info = &default_backing_dev_info;
170 mapping->writeback_index = 0;
171
172
173
174
175
176
177 if (sb->s_bdev) {
178 struct backing_dev_info *bdi;
179
180 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
181 mapping->backing_dev_info = bdi;
182 }
183 inode->i_private = NULL;
184 inode->i_mapping = mapping;
185 INIT_HLIST_HEAD(&inode->i_dentry);
186#ifdef CONFIG_FS_POSIX_ACL
187 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
188#endif
189
190#ifdef CONFIG_FSNOTIFY
191 inode->i_fsnotify_mask = 0;
192#endif
193
194 this_cpu_inc(nr_inodes);
195
196 return 0;
197out:
198 return -ENOMEM;
199}
200EXPORT_SYMBOL(inode_init_always);
201
202static struct inode *alloc_inode(struct super_block *sb)
203{
204 struct inode *inode;
205
206 if (sb->s_op->alloc_inode)
207 inode = sb->s_op->alloc_inode(sb);
208 else
209 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
210
211 if (!inode)
212 return NULL;
213
214 if (unlikely(inode_init_always(sb, inode))) {
215 if (inode->i_sb->s_op->destroy_inode)
216 inode->i_sb->s_op->destroy_inode(inode);
217 else
218 kmem_cache_free(inode_cachep, inode);
219 return NULL;
220 }
221
222 return inode;
223}
224
225void free_inode_nonrcu(struct inode *inode)
226{
227 kmem_cache_free(inode_cachep, inode);
228}
229EXPORT_SYMBOL(free_inode_nonrcu);
230
231void __destroy_inode(struct inode *inode)
232{
233 BUG_ON(inode_has_buffers(inode));
234 security_inode_free(inode);
235 fsnotify_inode_delete(inode);
236 if (!inode->i_nlink) {
237 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
238 atomic_long_dec(&inode->i_sb->s_remove_count);
239 }
240
241#ifdef CONFIG_FS_POSIX_ACL
242 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
243 posix_acl_release(inode->i_acl);
244 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
245 posix_acl_release(inode->i_default_acl);
246#endif
247 this_cpu_dec(nr_inodes);
248}
249EXPORT_SYMBOL(__destroy_inode);
250
251static void i_callback(struct rcu_head *head)
252{
253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 kmem_cache_free(inode_cachep, inode);
255}
256
257static void destroy_inode(struct inode *inode)
258{
259 BUG_ON(!list_empty(&inode->i_lru));
260 __destroy_inode(inode);
261 if (inode->i_sb->s_op->destroy_inode)
262 inode->i_sb->s_op->destroy_inode(inode);
263 else
264 call_rcu(&inode->i_rcu, i_callback);
265}
266
267
268
269
270
271
272
273
274
275
276
277
278void drop_nlink(struct inode *inode)
279{
280 WARN_ON(inode->i_nlink == 0);
281 inode->__i_nlink--;
282 if (!inode->i_nlink)
283 atomic_long_inc(&inode->i_sb->s_remove_count);
284}
285EXPORT_SYMBOL(drop_nlink);
286
287
288
289
290
291
292
293
294
295void clear_nlink(struct inode *inode)
296{
297 if (inode->i_nlink) {
298 inode->__i_nlink = 0;
299 atomic_long_inc(&inode->i_sb->s_remove_count);
300 }
301}
302EXPORT_SYMBOL(clear_nlink);
303
304
305
306
307
308
309
310
311
312void set_nlink(struct inode *inode, unsigned int nlink)
313{
314 if (!nlink) {
315 clear_nlink(inode);
316 } else {
317
318 if (inode->i_nlink == 0)
319 atomic_long_dec(&inode->i_sb->s_remove_count);
320
321 inode->__i_nlink = nlink;
322 }
323}
324EXPORT_SYMBOL(set_nlink);
325
326
327
328
329
330
331
332
333
334void inc_nlink(struct inode *inode)
335{
336 if (unlikely(inode->i_nlink == 0)) {
337 WARN_ON(!(inode->i_state & I_LINKABLE));
338 atomic_long_dec(&inode->i_sb->s_remove_count);
339 }
340
341 inode->__i_nlink++;
342}
343EXPORT_SYMBOL(inc_nlink);
344
345void address_space_init_once(struct address_space *mapping)
346{
347 memset(mapping, 0, sizeof(*mapping));
348 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
349 spin_lock_init(&mapping->tree_lock);
350 mutex_init(&mapping->i_mmap_mutex);
351 INIT_LIST_HEAD(&mapping->private_list);
352 spin_lock_init(&mapping->private_lock);
353 mapping->i_mmap = RB_ROOT;
354 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
355}
356EXPORT_SYMBOL(address_space_init_once);
357
358
359
360
361
362
363void inode_init_once(struct inode *inode)
364{
365 memset(inode, 0, sizeof(*inode));
366 INIT_HLIST_NODE(&inode->i_hash);
367 INIT_LIST_HEAD(&inode->i_devices);
368 INIT_LIST_HEAD(&inode->i_wb_list);
369 INIT_LIST_HEAD(&inode->i_lru);
370 address_space_init_once(&inode->i_data);
371 i_size_ordered_init(inode);
372#ifdef CONFIG_FSNOTIFY
373 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
374#endif
375}
376EXPORT_SYMBOL(inode_init_once);
377
378static void init_once(void *foo)
379{
380 struct inode *inode = (struct inode *) foo;
381
382 inode_init_once(inode);
383}
384
385
386
387
388void __iget(struct inode *inode)
389{
390 atomic_inc(&inode->i_count);
391}
392
393
394
395
396void ihold(struct inode *inode)
397{
398 WARN_ON(atomic_inc_return(&inode->i_count) < 2);
399}
400EXPORT_SYMBOL(ihold);
401
402static void inode_lru_list_add(struct inode *inode)
403{
404 spin_lock(&inode->i_sb->s_inode_lru_lock);
405 if (list_empty(&inode->i_lru)) {
406 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
407 inode->i_sb->s_nr_inodes_unused++;
408 this_cpu_inc(nr_unused);
409 }
410 spin_unlock(&inode->i_sb->s_inode_lru_lock);
411}
412
413
414
415
416
417
418void inode_add_lru(struct inode *inode)
419{
420 if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
421 !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
422 inode_lru_list_add(inode);
423}
424
425
426static void inode_lru_list_del(struct inode *inode)
427{
428 spin_lock(&inode->i_sb->s_inode_lru_lock);
429 if (!list_empty(&inode->i_lru)) {
430 list_del_init(&inode->i_lru);
431 inode->i_sb->s_nr_inodes_unused--;
432 this_cpu_dec(nr_unused);
433 }
434 spin_unlock(&inode->i_sb->s_inode_lru_lock);
435}
436
437
438
439
440
441void inode_sb_list_add(struct inode *inode)
442{
443 spin_lock(&inode_sb_list_lock);
444 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
445 spin_unlock(&inode_sb_list_lock);
446}
447EXPORT_SYMBOL_GPL(inode_sb_list_add);
448
449static inline void inode_sb_list_del(struct inode *inode)
450{
451 if (!list_empty(&inode->i_sb_list)) {
452 spin_lock(&inode_sb_list_lock);
453 list_del_init(&inode->i_sb_list);
454 spin_unlock(&inode_sb_list_lock);
455 }
456}
457
458static unsigned long hash(struct super_block *sb, unsigned long hashval)
459{
460 unsigned long tmp;
461
462 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
463 L1_CACHE_BYTES;
464 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
465 return tmp & i_hash_mask;
466}
467
468
469
470
471
472
473
474
475
476void __insert_inode_hash(struct inode *inode, unsigned long hashval)
477{
478 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
479
480 spin_lock(&inode_hash_lock);
481 spin_lock(&inode->i_lock);
482 hlist_add_head(&inode->i_hash, b);
483 spin_unlock(&inode->i_lock);
484 spin_unlock(&inode_hash_lock);
485}
486EXPORT_SYMBOL(__insert_inode_hash);
487
488
489
490
491
492
493
494void __remove_inode_hash(struct inode *inode)
495{
496 spin_lock(&inode_hash_lock);
497 spin_lock(&inode->i_lock);
498 hlist_del_init(&inode->i_hash);
499 spin_unlock(&inode->i_lock);
500 spin_unlock(&inode_hash_lock);
501}
502EXPORT_SYMBOL(__remove_inode_hash);
503
504void clear_inode(struct inode *inode)
505{
506 might_sleep();
507
508
509
510
511
512 spin_lock_irq(&inode->i_data.tree_lock);
513 BUG_ON(inode->i_data.nrpages);
514 spin_unlock_irq(&inode->i_data.tree_lock);
515 BUG_ON(!list_empty(&inode->i_data.private_list));
516 BUG_ON(!(inode->i_state & I_FREEING));
517 BUG_ON(inode->i_state & I_CLEAR);
518
519 inode->i_state = I_FREEING | I_CLEAR;
520}
521EXPORT_SYMBOL(clear_inode);
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536static void evict(struct inode *inode)
537{
538 const struct super_operations *op = inode->i_sb->s_op;
539
540 BUG_ON(!(inode->i_state & I_FREEING));
541 BUG_ON(!list_empty(&inode->i_lru));
542
543 if (!list_empty(&inode->i_wb_list))
544 inode_wb_list_del(inode);
545
546 inode_sb_list_del(inode);
547
548
549
550
551
552
553
554 inode_wait_for_writeback(inode);
555
556 if (op->evict_inode) {
557 op->evict_inode(inode);
558 } else {
559 if (inode->i_data.nrpages)
560 truncate_inode_pages(&inode->i_data, 0);
561 clear_inode(inode);
562 }
563 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
564 bd_forget(inode);
565 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
566 cd_forget(inode);
567
568 remove_inode_hash(inode);
569
570 spin_lock(&inode->i_lock);
571 wake_up_bit(&inode->i_state, __I_NEW);
572 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
573 spin_unlock(&inode->i_lock);
574
575 destroy_inode(inode);
576}
577
578
579
580
581
582
583
584
585static void dispose_list(struct list_head *head)
586{
587 while (!list_empty(head)) {
588 struct inode *inode;
589
590 inode = list_first_entry(head, struct inode, i_lru);
591 list_del_init(&inode->i_lru);
592
593 evict(inode);
594 }
595}
596
597
598
599
600
601
602
603
604
605
606void evict_inodes(struct super_block *sb)
607{
608 struct inode *inode, *next;
609 LIST_HEAD(dispose);
610
611 spin_lock(&inode_sb_list_lock);
612 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
613 if (atomic_read(&inode->i_count))
614 continue;
615
616 spin_lock(&inode->i_lock);
617 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
618 spin_unlock(&inode->i_lock);
619 continue;
620 }
621
622 inode->i_state |= I_FREEING;
623 inode_lru_list_del(inode);
624 spin_unlock(&inode->i_lock);
625 list_add(&inode->i_lru, &dispose);
626 }
627 spin_unlock(&inode_sb_list_lock);
628
629 dispose_list(&dispose);
630}
631
632
633
634
635
636
637
638
639
640
641
642int invalidate_inodes(struct super_block *sb, bool kill_dirty)
643{
644 int busy = 0;
645 struct inode *inode, *next;
646 LIST_HEAD(dispose);
647
648 spin_lock(&inode_sb_list_lock);
649 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
650 spin_lock(&inode->i_lock);
651 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
652 spin_unlock(&inode->i_lock);
653 continue;
654 }
655 if (inode->i_state & I_DIRTY && !kill_dirty) {
656 spin_unlock(&inode->i_lock);
657 busy = 1;
658 continue;
659 }
660 if (atomic_read(&inode->i_count)) {
661 spin_unlock(&inode->i_lock);
662 busy = 1;
663 continue;
664 }
665
666 inode->i_state |= I_FREEING;
667 inode_lru_list_del(inode);
668 spin_unlock(&inode->i_lock);
669 list_add(&inode->i_lru, &dispose);
670 }
671 spin_unlock(&inode_sb_list_lock);
672
673 dispose_list(&dispose);
674
675 return busy;
676}
677
678static int can_unuse(struct inode *inode)
679{
680 if (inode->i_state & ~I_REFERENCED)
681 return 0;
682 if (inode_has_buffers(inode))
683 return 0;
684 if (atomic_read(&inode->i_count))
685 return 0;
686 if (inode->i_data.nrpages)
687 return 0;
688 return 1;
689}
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709void prune_icache_sb(struct super_block *sb, int nr_to_scan)
710{
711 LIST_HEAD(freeable);
712 int nr_scanned;
713 unsigned long reap = 0;
714
715 spin_lock(&sb->s_inode_lru_lock);
716 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
717 struct inode *inode;
718
719 if (list_empty(&sb->s_inode_lru))
720 break;
721
722 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
723
724
725
726
727
728
729 if (!spin_trylock(&inode->i_lock)) {
730 list_move(&inode->i_lru, &sb->s_inode_lru);
731 continue;
732 }
733
734
735
736
737
738 if (atomic_read(&inode->i_count) ||
739 (inode->i_state & ~I_REFERENCED)) {
740 list_del_init(&inode->i_lru);
741 spin_unlock(&inode->i_lock);
742 sb->s_nr_inodes_unused--;
743 this_cpu_dec(nr_unused);
744 continue;
745 }
746
747
748 if (inode->i_state & I_REFERENCED) {
749 inode->i_state &= ~I_REFERENCED;
750 list_move(&inode->i_lru, &sb->s_inode_lru);
751 spin_unlock(&inode->i_lock);
752 continue;
753 }
754 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
755 __iget(inode);
756 spin_unlock(&inode->i_lock);
757 spin_unlock(&sb->s_inode_lru_lock);
758 if (remove_inode_buffers(inode))
759 reap += invalidate_mapping_pages(&inode->i_data,
760 0, -1);
761 iput(inode);
762 spin_lock(&sb->s_inode_lru_lock);
763
764 if (inode != list_entry(sb->s_inode_lru.next,
765 struct inode, i_lru))
766 continue;
767
768 if (!spin_trylock(&inode->i_lock))
769 continue;
770 if (!can_unuse(inode)) {
771 spin_unlock(&inode->i_lock);
772 continue;
773 }
774 }
775 WARN_ON(inode->i_state & I_NEW);
776 inode->i_state |= I_FREEING;
777 spin_unlock(&inode->i_lock);
778
779 list_move(&inode->i_lru, &freeable);
780 sb->s_nr_inodes_unused--;
781 this_cpu_dec(nr_unused);
782 }
783 if (current_is_kswapd())
784 __count_vm_events(KSWAPD_INODESTEAL, reap);
785 else
786 __count_vm_events(PGINODESTEAL, reap);
787 spin_unlock(&sb->s_inode_lru_lock);
788 if (current->reclaim_state)
789 current->reclaim_state->reclaimed_slab += reap;
790
791 dispose_list(&freeable);
792}
793
794static void __wait_on_freeing_inode(struct inode *inode);
795
796
797
798static struct inode *find_inode(struct super_block *sb,
799 struct hlist_head *head,
800 int (*test)(struct inode *, void *),
801 void *data)
802{
803 struct inode *inode = NULL;
804
805repeat:
806 hlist_for_each_entry(inode, head, i_hash) {
807 spin_lock(&inode->i_lock);
808 if (inode->i_sb != sb) {
809 spin_unlock(&inode->i_lock);
810 continue;
811 }
812 if (!test(inode, data)) {
813 spin_unlock(&inode->i_lock);
814 continue;
815 }
816 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
817 __wait_on_freeing_inode(inode);
818 goto repeat;
819 }
820 __iget(inode);
821 spin_unlock(&inode->i_lock);
822 return inode;
823 }
824 return NULL;
825}
826
827
828
829
830
831static struct inode *find_inode_fast(struct super_block *sb,
832 struct hlist_head *head, unsigned long ino)
833{
834 struct inode *inode = NULL;
835
836repeat:
837 hlist_for_each_entry(inode, head, i_hash) {
838 spin_lock(&inode->i_lock);
839 if (inode->i_ino != ino) {
840 spin_unlock(&inode->i_lock);
841 continue;
842 }
843 if (inode->i_sb != sb) {
844 spin_unlock(&inode->i_lock);
845 continue;
846 }
847 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
848 __wait_on_freeing_inode(inode);
849 goto repeat;
850 }
851 __iget(inode);
852 spin_unlock(&inode->i_lock);
853 return inode;
854 }
855 return NULL;
856}
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873#define LAST_INO_BATCH 1024
874static DEFINE_PER_CPU(unsigned int, last_ino);
875
876unsigned int get_next_ino(void)
877{
878 unsigned int *p = &get_cpu_var(last_ino);
879 unsigned int res = *p;
880
881#ifdef CONFIG_SMP
882 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
883 static atomic_t shared_last_ino;
884 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
885
886 res = next - LAST_INO_BATCH;
887 }
888#endif
889
890 *p = ++res;
891 put_cpu_var(last_ino);
892 return res;
893}
894EXPORT_SYMBOL(get_next_ino);
895
896
897
898
899
900
901
902
903
904
905
906struct inode *new_inode_pseudo(struct super_block *sb)
907{
908 struct inode *inode = alloc_inode(sb);
909
910 if (inode) {
911 spin_lock(&inode->i_lock);
912 inode->i_state = 0;
913 spin_unlock(&inode->i_lock);
914 INIT_LIST_HEAD(&inode->i_sb_list);
915 }
916 return inode;
917}
918
919
920
921
922
923
924
925
926
927
928
929
930
931struct inode *new_inode(struct super_block *sb)
932{
933 struct inode *inode;
934
935 spin_lock_prefetch(&inode_sb_list_lock);
936
937 inode = new_inode_pseudo(sb);
938 if (inode)
939 inode_sb_list_add(inode);
940 return inode;
941}
942EXPORT_SYMBOL(new_inode);
943
944#ifdef CONFIG_DEBUG_LOCK_ALLOC
945void lockdep_annotate_inode_mutex_key(struct inode *inode)
946{
947 if (S_ISDIR(inode->i_mode)) {
948 struct file_system_type *type = inode->i_sb->s_type;
949
950
951 if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) {
952
953
954
955 mutex_destroy(&inode->i_mutex);
956 mutex_init(&inode->i_mutex);
957 lockdep_set_class(&inode->i_mutex,
958 &type->i_mutex_dir_key);
959 }
960 }
961}
962EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
963#endif
964
965
966
967
968
969
970
971
972void unlock_new_inode(struct inode *inode)
973{
974 lockdep_annotate_inode_mutex_key(inode);
975 spin_lock(&inode->i_lock);
976 WARN_ON(!(inode->i_state & I_NEW));
977 inode->i_state &= ~I_NEW;
978 smp_mb();
979 wake_up_bit(&inode->i_state, __I_NEW);
980 spin_unlock(&inode->i_lock);
981}
982EXPORT_SYMBOL(unlock_new_inode);
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1005 int (*test)(struct inode *, void *),
1006 int (*set)(struct inode *, void *), void *data)
1007{
1008 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1009 struct inode *inode;
1010
1011 spin_lock(&inode_hash_lock);
1012 inode = find_inode(sb, head, test, data);
1013 spin_unlock(&inode_hash_lock);
1014
1015 if (inode) {
1016 wait_on_inode(inode);
1017 return inode;
1018 }
1019
1020 inode = alloc_inode(sb);
1021 if (inode) {
1022 struct inode *old;
1023
1024 spin_lock(&inode_hash_lock);
1025
1026 old = find_inode(sb, head, test, data);
1027 if (!old) {
1028 if (set(inode, data))
1029 goto set_failed;
1030
1031 spin_lock(&inode->i_lock);
1032 inode->i_state = I_NEW;
1033 hlist_add_head(&inode->i_hash, head);
1034 spin_unlock(&inode->i_lock);
1035 inode_sb_list_add(inode);
1036 spin_unlock(&inode_hash_lock);
1037
1038
1039
1040
1041 return inode;
1042 }
1043
1044
1045
1046
1047
1048
1049 spin_unlock(&inode_hash_lock);
1050 destroy_inode(inode);
1051 inode = old;
1052 wait_on_inode(inode);
1053 }
1054 return inode;
1055
1056set_failed:
1057 spin_unlock(&inode_hash_lock);
1058 destroy_inode(inode);
1059 return NULL;
1060}
1061EXPORT_SYMBOL(iget5_locked);
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1077{
1078 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1079 struct inode *inode;
1080
1081 spin_lock(&inode_hash_lock);
1082 inode = find_inode_fast(sb, head, ino);
1083 spin_unlock(&inode_hash_lock);
1084 if (inode) {
1085 wait_on_inode(inode);
1086 return inode;
1087 }
1088
1089 inode = alloc_inode(sb);
1090 if (inode) {
1091 struct inode *old;
1092
1093 spin_lock(&inode_hash_lock);
1094
1095 old = find_inode_fast(sb, head, ino);
1096 if (!old) {
1097 inode->i_ino = ino;
1098 spin_lock(&inode->i_lock);
1099 inode->i_state = I_NEW;
1100 hlist_add_head(&inode->i_hash, head);
1101 spin_unlock(&inode->i_lock);
1102 inode_sb_list_add(inode);
1103 spin_unlock(&inode_hash_lock);
1104
1105
1106
1107
1108 return inode;
1109 }
1110
1111
1112
1113
1114
1115
1116 spin_unlock(&inode_hash_lock);
1117 destroy_inode(inode);
1118 inode = old;
1119 wait_on_inode(inode);
1120 }
1121 return inode;
1122}
1123EXPORT_SYMBOL(iget_locked);
1124
1125
1126
1127
1128
1129
1130
1131
1132static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1133{
1134 struct hlist_head *b = inode_hashtable + hash(sb, ino);
1135 struct inode *inode;
1136
1137 spin_lock(&inode_hash_lock);
1138 hlist_for_each_entry(inode, b, i_hash) {
1139 if (inode->i_ino == ino && inode->i_sb == sb) {
1140 spin_unlock(&inode_hash_lock);
1141 return 0;
1142 }
1143 }
1144 spin_unlock(&inode_hash_lock);
1145
1146 return 1;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163ino_t iunique(struct super_block *sb, ino_t max_reserved)
1164{
1165
1166
1167
1168
1169
1170 static DEFINE_SPINLOCK(iunique_lock);
1171 static unsigned int counter;
1172 ino_t res;
1173
1174 spin_lock(&iunique_lock);
1175 do {
1176 if (counter <= max_reserved)
1177 counter = max_reserved + 1;
1178 res = counter++;
1179 } while (!test_inode_iunique(sb, res));
1180 spin_unlock(&iunique_lock);
1181
1182 return res;
1183}
1184EXPORT_SYMBOL(iunique);
1185
1186struct inode *igrab(struct inode *inode)
1187{
1188 spin_lock(&inode->i_lock);
1189 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1190 __iget(inode);
1191 spin_unlock(&inode->i_lock);
1192 } else {
1193 spin_unlock(&inode->i_lock);
1194
1195
1196
1197
1198
1199 inode = NULL;
1200 }
1201 return inode;
1202}
1203EXPORT_SYMBOL(igrab);
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1222 int (*test)(struct inode *, void *), void *data)
1223{
1224 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1225 struct inode *inode;
1226
1227 spin_lock(&inode_hash_lock);
1228 inode = find_inode(sb, head, test, data);
1229 spin_unlock(&inode_hash_lock);
1230
1231 return inode;
1232}
1233EXPORT_SYMBOL(ilookup5_nowait);
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1253 int (*test)(struct inode *, void *), void *data)
1254{
1255 struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
1256
1257 if (inode)
1258 wait_on_inode(inode);
1259 return inode;
1260}
1261EXPORT_SYMBOL(ilookup5);
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271struct inode *ilookup(struct super_block *sb, unsigned long ino)
1272{
1273 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1274 struct inode *inode;
1275
1276 spin_lock(&inode_hash_lock);
1277 inode = find_inode_fast(sb, head, ino);
1278 spin_unlock(&inode_hash_lock);
1279
1280 if (inode)
1281 wait_on_inode(inode);
1282 return inode;
1283}
1284EXPORT_SYMBOL(ilookup);
1285
1286int insert_inode_locked(struct inode *inode)
1287{
1288 struct super_block *sb = inode->i_sb;
1289 ino_t ino = inode->i_ino;
1290 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1291
1292 while (1) {
1293 struct inode *old = NULL;
1294 spin_lock(&inode_hash_lock);
1295 hlist_for_each_entry(old, head, i_hash) {
1296 if (old->i_ino != ino)
1297 continue;
1298 if (old->i_sb != sb)
1299 continue;
1300 spin_lock(&old->i_lock);
1301 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1302 spin_unlock(&old->i_lock);
1303 continue;
1304 }
1305 break;
1306 }
1307 if (likely(!old)) {
1308 spin_lock(&inode->i_lock);
1309 inode->i_state |= I_NEW;
1310 hlist_add_head(&inode->i_hash, head);
1311 spin_unlock(&inode->i_lock);
1312 spin_unlock(&inode_hash_lock);
1313 return 0;
1314 }
1315 __iget(old);
1316 spin_unlock(&old->i_lock);
1317 spin_unlock(&inode_hash_lock);
1318 wait_on_inode(old);
1319 if (unlikely(!inode_unhashed(old))) {
1320 iput(old);
1321 return -EBUSY;
1322 }
1323 iput(old);
1324 }
1325}
1326EXPORT_SYMBOL(insert_inode_locked);
1327
1328int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1329 int (*test)(struct inode *, void *), void *data)
1330{
1331 struct super_block *sb = inode->i_sb;
1332 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1333
1334 while (1) {
1335 struct inode *old = NULL;
1336
1337 spin_lock(&inode_hash_lock);
1338 hlist_for_each_entry(old, head, i_hash) {
1339 if (old->i_sb != sb)
1340 continue;
1341 if (!test(old, data))
1342 continue;
1343 spin_lock(&old->i_lock);
1344 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1345 spin_unlock(&old->i_lock);
1346 continue;
1347 }
1348 break;
1349 }
1350 if (likely(!old)) {
1351 spin_lock(&inode->i_lock);
1352 inode->i_state |= I_NEW;
1353 hlist_add_head(&inode->i_hash, head);
1354 spin_unlock(&inode->i_lock);
1355 spin_unlock(&inode_hash_lock);
1356 return 0;
1357 }
1358 __iget(old);
1359 spin_unlock(&old->i_lock);
1360 spin_unlock(&inode_hash_lock);
1361 wait_on_inode(old);
1362 if (unlikely(!inode_unhashed(old))) {
1363 iput(old);
1364 return -EBUSY;
1365 }
1366 iput(old);
1367 }
1368}
1369EXPORT_SYMBOL(insert_inode_locked4);
1370
1371
1372int generic_delete_inode(struct inode *inode)
1373{
1374 return 1;
1375}
1376EXPORT_SYMBOL(generic_delete_inode);
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388static void iput_final(struct inode *inode)
1389{
1390 struct super_block *sb = inode->i_sb;
1391 const struct super_operations *op = inode->i_sb->s_op;
1392 int drop;
1393
1394 WARN_ON(inode->i_state & I_NEW);
1395
1396 if (op->drop_inode)
1397 drop = op->drop_inode(inode);
1398 else
1399 drop = generic_drop_inode(inode);
1400
1401 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1402 inode->i_state |= I_REFERENCED;
1403 inode_add_lru(inode);
1404 spin_unlock(&inode->i_lock);
1405 return;
1406 }
1407
1408 if (!drop) {
1409 inode->i_state |= I_WILL_FREE;
1410 spin_unlock(&inode->i_lock);
1411 write_inode_now(inode, 1);
1412 spin_lock(&inode->i_lock);
1413 WARN_ON(inode->i_state & I_NEW);
1414 inode->i_state &= ~I_WILL_FREE;
1415 }
1416
1417 inode->i_state |= I_FREEING;
1418 if (!list_empty(&inode->i_lru))
1419 inode_lru_list_del(inode);
1420 spin_unlock(&inode->i_lock);
1421
1422 evict(inode);
1423}
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434void iput(struct inode *inode)
1435{
1436 if (inode) {
1437 BUG_ON(inode->i_state & I_CLEAR);
1438
1439 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
1440 iput_final(inode);
1441 }
1442}
1443EXPORT_SYMBOL(iput);
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456sector_t bmap(struct inode *inode, sector_t block)
1457{
1458 sector_t res = 0;
1459 if (inode->i_mapping->a_ops->bmap)
1460 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1461 return res;
1462}
1463EXPORT_SYMBOL(bmap);
1464
1465
1466
1467
1468
1469
1470static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1471 struct timespec now)
1472{
1473
1474 if (!(mnt->mnt_flags & MNT_RELATIME))
1475 return 1;
1476
1477
1478
1479 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1480 return 1;
1481
1482
1483
1484 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1485 return 1;
1486
1487
1488
1489
1490
1491 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1492 return 1;
1493
1494
1495
1496 return 0;
1497}
1498
1499
1500
1501
1502
1503static int update_time(struct inode *inode, struct timespec *time, int flags)
1504{
1505 if (inode->i_op->update_time)
1506 return inode->i_op->update_time(inode, time, flags);
1507
1508 if (flags & S_ATIME)
1509 inode->i_atime = *time;
1510 if (flags & S_VERSION)
1511 inode_inc_iversion(inode);
1512 if (flags & S_CTIME)
1513 inode->i_ctime = *time;
1514 if (flags & S_MTIME)
1515 inode->i_mtime = *time;
1516 mark_inode_dirty_sync(inode);
1517 return 0;
1518}
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528void touch_atime(struct path *path)
1529{
1530 struct vfsmount *mnt = path->mnt;
1531 struct inode *inode = path->dentry->d_inode;
1532 struct timespec now;
1533
1534 if (inode->i_flags & S_NOATIME)
1535 return;
1536 if (IS_NOATIME(inode))
1537 return;
1538 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1539 return;
1540
1541 if (mnt->mnt_flags & MNT_NOATIME)
1542 return;
1543 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1544 return;
1545
1546 now = current_fs_time(inode->i_sb);
1547
1548 if (!relatime_need_update(mnt, inode, now))
1549 return;
1550
1551 if (timespec_equal(&inode->i_atime, &now))
1552 return;
1553
1554 if (!sb_start_write_trylock(inode->i_sb))
1555 return;
1556
1557 if (__mnt_want_write(mnt))
1558 goto skip_update;
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568 update_time(inode, &now, S_ATIME);
1569 __mnt_drop_write(mnt);
1570skip_update:
1571 sb_end_write(inode->i_sb);
1572}
1573EXPORT_SYMBOL(touch_atime);
1574
1575
1576
1577
1578
1579
1580
1581int should_remove_suid(struct dentry *dentry)
1582{
1583 umode_t mode = dentry->d_inode->i_mode;
1584 int kill = 0;
1585
1586
1587 if (unlikely(mode & S_ISUID))
1588 kill = ATTR_KILL_SUID;
1589
1590
1591
1592
1593
1594 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1595 kill |= ATTR_KILL_SGID;
1596
1597 if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1598 return kill;
1599
1600 return 0;
1601}
1602EXPORT_SYMBOL(should_remove_suid);
1603
1604static int __remove_suid(struct dentry *dentry, int kill)
1605{
1606 struct iattr newattrs;
1607
1608 newattrs.ia_valid = ATTR_FORCE | kill;
1609 return notify_change(dentry, &newattrs);
1610}
1611
1612int file_remove_suid(struct file *file)
1613{
1614 struct dentry *dentry = file->f_path.dentry;
1615 struct inode *inode = dentry->d_inode;
1616 int killsuid;
1617 int killpriv;
1618 int error = 0;
1619
1620
1621 if (IS_NOSEC(inode))
1622 return 0;
1623
1624 killsuid = should_remove_suid(dentry);
1625 killpriv = security_inode_need_killpriv(dentry);
1626
1627 if (killpriv < 0)
1628 return killpriv;
1629 if (killpriv)
1630 error = security_inode_killpriv(dentry);
1631 if (!error && killsuid)
1632 error = __remove_suid(dentry, killsuid);
1633 if (!error && (inode->i_sb->s_flags & MS_NOSEC))
1634 inode->i_flags |= S_NOSEC;
1635
1636 return error;
1637}
1638EXPORT_SYMBOL(file_remove_suid);
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653int file_update_time(struct file *file)
1654{
1655 struct inode *inode = file_inode(file);
1656 struct timespec now;
1657 int sync_it = 0;
1658 int ret;
1659
1660
1661 if (IS_NOCMTIME(inode))
1662 return 0;
1663
1664 now = current_fs_time(inode->i_sb);
1665 if (!timespec_equal(&inode->i_mtime, &now))
1666 sync_it = S_MTIME;
1667
1668 if (!timespec_equal(&inode->i_ctime, &now))
1669 sync_it |= S_CTIME;
1670
1671 if (IS_I_VERSION(inode))
1672 sync_it |= S_VERSION;
1673
1674 if (!sync_it)
1675 return 0;
1676
1677
1678 if (__mnt_want_write_file(file))
1679 return 0;
1680
1681 ret = update_time(inode, &now, sync_it);
1682 __mnt_drop_write_file(file);
1683
1684 return ret;
1685}
1686EXPORT_SYMBOL(file_update_time);
1687
1688int inode_needs_sync(struct inode *inode)
1689{
1690 if (IS_SYNC(inode))
1691 return 1;
1692 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
1693 return 1;
1694 return 0;
1695}
1696EXPORT_SYMBOL(inode_needs_sync);
1697
1698int inode_wait(void *word)
1699{
1700 schedule();
1701 return 0;
1702}
1703EXPORT_SYMBOL(inode_wait);
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716static void __wait_on_freeing_inode(struct inode *inode)
1717{
1718 wait_queue_head_t *wq;
1719 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1720 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1721 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1722 spin_unlock(&inode->i_lock);
1723 spin_unlock(&inode_hash_lock);
1724 schedule();
1725 finish_wait(wq, &wait.wait);
1726 spin_lock(&inode_hash_lock);
1727}
1728
1729static __initdata unsigned long ihash_entries;
1730static int __init set_ihash_entries(char *str)
1731{
1732 if (!str)
1733 return 0;
1734 ihash_entries = simple_strtoul(str, &str, 0);
1735 return 1;
1736}
1737__setup("ihash_entries=", set_ihash_entries);
1738
1739
1740
1741
1742void __init inode_init_early(void)
1743{
1744 unsigned int loop;
1745
1746
1747
1748
1749 if (hashdist)
1750 return;
1751
1752 inode_hashtable =
1753 alloc_large_system_hash("Inode-cache",
1754 sizeof(struct hlist_head),
1755 ihash_entries,
1756 14,
1757 HASH_EARLY,
1758 &i_hash_shift,
1759 &i_hash_mask,
1760 0,
1761 0);
1762
1763 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1764 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1765}
1766
1767void __init inode_init(void)
1768{
1769 unsigned int loop;
1770
1771
1772 inode_cachep = kmem_cache_create("inode_cache",
1773 sizeof(struct inode),
1774 0,
1775 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1776 SLAB_MEM_SPREAD),
1777 init_once);
1778
1779
1780 if (!hashdist)
1781 return;
1782
1783 inode_hashtable =
1784 alloc_large_system_hash("Inode-cache",
1785 sizeof(struct hlist_head),
1786 ihash_entries,
1787 14,
1788 0,
1789 &i_hash_shift,
1790 &i_hash_mask,
1791 0,
1792 0);
1793
1794 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1795 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1796}
1797
1798void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1799{
1800 inode->i_mode = mode;
1801 if (S_ISCHR(mode)) {
1802 inode->i_fop = &def_chr_fops;
1803 inode->i_rdev = rdev;
1804 } else if (S_ISBLK(mode)) {
1805 inode->i_fop = &def_blk_fops;
1806 inode->i_rdev = rdev;
1807 } else if (S_ISFIFO(mode))
1808 inode->i_fop = &pipefifo_fops;
1809 else if (S_ISSOCK(mode))
1810 inode->i_fop = &bad_sock_fops;
1811 else
1812 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
1813 " inode %s:%lu\n", mode, inode->i_sb->s_id,
1814 inode->i_ino);
1815}
1816EXPORT_SYMBOL(init_special_inode);
1817
1818
1819
1820
1821
1822
1823
1824void inode_init_owner(struct inode *inode, const struct inode *dir,
1825 umode_t mode)
1826{
1827 inode->i_uid = current_fsuid();
1828 if (dir && dir->i_mode & S_ISGID) {
1829 inode->i_gid = dir->i_gid;
1830 if (S_ISDIR(mode))
1831 mode |= S_ISGID;
1832 } else
1833 inode->i_gid = current_fsgid();
1834 inode->i_mode = mode;
1835}
1836EXPORT_SYMBOL(inode_init_owner);
1837
1838
1839
1840
1841
1842
1843
1844
1845bool inode_owner_or_capable(const struct inode *inode)
1846{
1847 if (uid_eq(current_fsuid(), inode->i_uid))
1848 return true;
1849 if (inode_capable(inode, CAP_FOWNER))
1850 return true;
1851 return false;
1852}
1853EXPORT_SYMBOL(inode_owner_or_capable);
1854
1855
1856
1857
1858static void __inode_dio_wait(struct inode *inode)
1859{
1860 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
1861 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
1862
1863 do {
1864 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
1865 if (atomic_read(&inode->i_dio_count))
1866 schedule();
1867 } while (atomic_read(&inode->i_dio_count));
1868 finish_wait(wq, &q.wait);
1869}
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881void inode_dio_wait(struct inode *inode)
1882{
1883 if (atomic_read(&inode->i_dio_count))
1884 __inode_dio_wait(inode);
1885}
1886EXPORT_SYMBOL(inode_dio_wait);
1887
1888
1889
1890
1891
1892
1893
1894
1895void inode_dio_done(struct inode *inode)
1896{
1897 if (atomic_dec_and_test(&inode->i_dio_count))
1898 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
1899}
1900EXPORT_SYMBOL(inode_dio_done);
1901