1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/export.h>
24#include <linux/slab.h>
25#include <linux/blkdev.h>
26#include <linux/mount.h>
27#include <linux/security.h>
28#include <linux/writeback.h>
29#include <linux/idr.h>
30#include <linux/mutex.h>
31#include <linux/backing-dev.h>
32#include <linux/rculist_bl.h>
33#include <linux/cleancache.h>
34#include <linux/fsnotify.h>
35#include <linux/lockdep.h>
36#include <linux/user_namespace.h>
37#include "internal.h"
38
39
40static LIST_HEAD(super_blocks);
41static DEFINE_SPINLOCK(sb_lock);
42
43static char *sb_writers_name[SB_FREEZE_LEVELS] = {
44 "sb_writers",
45 "sb_pagefaults",
46 "sb_internal",
47};
48
49
50
51
52
53
54
55
56static unsigned long super_cache_scan(struct shrinker *shrink,
57 struct shrink_control *sc)
58{
59 struct super_block *sb;
60 long fs_objects = 0;
61 long total_objects;
62 long freed = 0;
63 long dentries;
64 long inodes;
65
66 sb = container_of(shrink, struct super_block, s_shrink);
67
68
69
70
71
72 if (!(sc->gfp_mask & __GFP_FS))
73 return SHRINK_STOP;
74
75 if (!trylock_super(sb))
76 return SHRINK_STOP;
77
78 if (sb->s_op->nr_cached_objects)
79 fs_objects = sb->s_op->nr_cached_objects(sb, sc);
80
81 inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
82 dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
83 total_objects = dentries + inodes + fs_objects + 1;
84 if (!total_objects)
85 total_objects = 1;
86
87
88 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
89 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
90 fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
91
92
93
94
95
96
97
98
99 sc->nr_to_scan = dentries + 1;
100 freed = prune_dcache_sb(sb, sc);
101 sc->nr_to_scan = inodes + 1;
102 freed += prune_icache_sb(sb, sc);
103
104 if (fs_objects) {
105 sc->nr_to_scan = fs_objects + 1;
106 freed += sb->s_op->free_cached_objects(sb, sc);
107 }
108
109 up_read(&sb->s_umount);
110 return freed;
111}
112
113static unsigned long super_cache_count(struct shrinker *shrink,
114 struct shrink_control *sc)
115{
116 struct super_block *sb;
117 long total_objects = 0;
118
119 sb = container_of(shrink, struct super_block, s_shrink);
120
121
122
123
124
125
126
127
128
129 if (sb->s_op && sb->s_op->nr_cached_objects)
130 total_objects = sb->s_op->nr_cached_objects(sb, sc);
131
132 total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
133 total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
134
135 total_objects = vfs_pressure_ratio(total_objects);
136 return total_objects;
137}
138
139static void destroy_super_work(struct work_struct *work)
140{
141 struct super_block *s = container_of(work, struct super_block,
142 destroy_work);
143 int i;
144
145 for (i = 0; i < SB_FREEZE_LEVELS; i++)
146 percpu_free_rwsem(&s->s_writers.rw_sem[i]);
147 kfree(s);
148}
149
150static void destroy_super_rcu(struct rcu_head *head)
151{
152 struct super_block *s = container_of(head, struct super_block, rcu);
153 INIT_WORK(&s->destroy_work, destroy_super_work);
154 schedule_work(&s->destroy_work);
155}
156
157
158
159
160
161
162
163static void destroy_super(struct super_block *s)
164{
165 list_lru_destroy(&s->s_dentry_lru);
166 list_lru_destroy(&s->s_inode_lru);
167 security_sb_free(s);
168 WARN_ON(!list_empty(&s->s_mounts));
169 put_user_ns(s->s_user_ns);
170 kfree(s->s_subtype);
171 kfree(s->s_options);
172 call_rcu(&s->rcu, destroy_super_rcu);
173}
174
175
176
177
178
179
180
181
182
183
184static struct super_block *alloc_super(struct file_system_type *type, int flags,
185 struct user_namespace *user_ns)
186{
187 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
188 static const struct super_operations default_op;
189 int i;
190
191 if (!s)
192 return NULL;
193
194 INIT_LIST_HEAD(&s->s_mounts);
195 s->s_user_ns = get_user_ns(user_ns);
196
197 if (security_sb_alloc(s))
198 goto fail;
199
200 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
201 if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
202 sb_writers_name[i],
203 &type->s_writers_key[i]))
204 goto fail;
205 }
206 init_waitqueue_head(&s->s_writers.wait_unfrozen);
207 s->s_bdi = &noop_backing_dev_info;
208 s->s_flags = flags;
209 if (s->s_user_ns != &init_user_ns)
210 s->s_iflags |= SB_I_NODEV;
211 INIT_HLIST_NODE(&s->s_instances);
212 INIT_HLIST_BL_HEAD(&s->s_anon);
213 mutex_init(&s->s_sync_lock);
214 INIT_LIST_HEAD(&s->s_inodes);
215 spin_lock_init(&s->s_inode_list_lock);
216 INIT_LIST_HEAD(&s->s_inodes_wb);
217 spin_lock_init(&s->s_inode_wblist_lock);
218
219 if (list_lru_init_memcg(&s->s_dentry_lru))
220 goto fail;
221 if (list_lru_init_memcg(&s->s_inode_lru))
222 goto fail;
223
224 init_rwsem(&s->s_umount);
225 lockdep_set_class(&s->s_umount, &type->s_umount_key);
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
242 s->s_count = 1;
243 atomic_set(&s->s_active, 1);
244 mutex_init(&s->s_vfs_rename_mutex);
245 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
246 mutex_init(&s->s_dquot.dqio_mutex);
247 s->s_maxbytes = MAX_NON_LFS;
248 s->s_op = &default_op;
249 s->s_time_gran = 1000000000;
250 s->cleancache_poolid = CLEANCACHE_NO_POOL;
251
252 s->s_shrink.seeks = DEFAULT_SEEKS;
253 s->s_shrink.scan_objects = super_cache_scan;
254 s->s_shrink.count_objects = super_cache_count;
255 s->s_shrink.batch = 1024;
256 s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
257 return s;
258
259fail:
260 destroy_super(s);
261 return NULL;
262}
263
264
265
266
267
268
269static void __put_super(struct super_block *sb)
270{
271 if (!--sb->s_count) {
272 list_del_init(&sb->s_list);
273 destroy_super(sb);
274 }
275}
276
277
278
279
280
281
282
283
284static void put_super(struct super_block *sb)
285{
286 spin_lock(&sb_lock);
287 __put_super(sb);
288 spin_unlock(&sb_lock);
289}
290
291
292
293
294
295
296
297
298
299
300
301
302
303void deactivate_locked_super(struct super_block *s)
304{
305 struct file_system_type *fs = s->s_type;
306 if (atomic_dec_and_test(&s->s_active)) {
307 cleancache_invalidate_fs(s);
308 unregister_shrinker(&s->s_shrink);
309 fs->kill_sb(s);
310
311
312
313
314
315
316 list_lru_destroy(&s->s_dentry_lru);
317 list_lru_destroy(&s->s_inode_lru);
318
319 put_filesystem(fs);
320 put_super(s);
321 } else {
322 up_write(&s->s_umount);
323 }
324}
325
326EXPORT_SYMBOL(deactivate_locked_super);
327
328
329
330
331
332
333
334
335
336void deactivate_super(struct super_block *s)
337{
338 if (!atomic_add_unless(&s->s_active, -1, 1)) {
339 down_write(&s->s_umount);
340 deactivate_locked_super(s);
341 }
342}
343
344EXPORT_SYMBOL(deactivate_super);
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359static int grab_super(struct super_block *s) __releases(sb_lock)
360{
361 s->s_count++;
362 spin_unlock(&sb_lock);
363 down_write(&s->s_umount);
364 if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
365 put_super(s);
366 return 1;
367 }
368 up_write(&s->s_umount);
369 put_super(s);
370 return 0;
371}
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390bool trylock_super(struct super_block *sb)
391{
392 if (down_read_trylock(&sb->s_umount)) {
393 if (!hlist_unhashed(&sb->s_instances) &&
394 sb->s_root && (sb->s_flags & MS_BORN))
395 return true;
396 up_read(&sb->s_umount);
397 }
398
399 return false;
400}
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416void generic_shutdown_super(struct super_block *sb)
417{
418 const struct super_operations *sop = sb->s_op;
419
420 if (sb->s_root) {
421 shrink_dcache_for_umount(sb);
422 sync_filesystem(sb);
423 sb->s_flags &= ~MS_ACTIVE;
424
425 fsnotify_unmount_inodes(sb);
426 cgroup_writeback_umount();
427
428 evict_inodes(sb);
429
430 if (sb->s_dio_done_wq) {
431 destroy_workqueue(sb->s_dio_done_wq);
432 sb->s_dio_done_wq = NULL;
433 }
434
435 if (sop->put_super)
436 sop->put_super(sb);
437
438 if (!list_empty(&sb->s_inodes)) {
439 printk("VFS: Busy inodes after unmount of %s. "
440 "Self-destruct in 5 seconds. Have a nice day...\n",
441 sb->s_id);
442 }
443 }
444 spin_lock(&sb_lock);
445
446 hlist_del_init(&sb->s_instances);
447 spin_unlock(&sb_lock);
448 up_write(&sb->s_umount);
449 if (sb->s_bdi != &noop_backing_dev_info) {
450 bdi_put(sb->s_bdi);
451 sb->s_bdi = &noop_backing_dev_info;
452 }
453}
454
455EXPORT_SYMBOL(generic_shutdown_super);
456
457
458
459
460
461
462
463
464
465
466struct super_block *sget_userns(struct file_system_type *type,
467 int (*test)(struct super_block *,void *),
468 int (*set)(struct super_block *,void *),
469 int flags, struct user_namespace *user_ns,
470 void *data)
471{
472 struct super_block *s = NULL;
473 struct super_block *old;
474 int err;
475
476 if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
477 !(type->fs_flags & FS_USERNS_MOUNT) &&
478 !capable(CAP_SYS_ADMIN))
479 return ERR_PTR(-EPERM);
480retry:
481 spin_lock(&sb_lock);
482 if (test) {
483 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
484 if (!test(old, data))
485 continue;
486 if (user_ns != old->s_user_ns) {
487 spin_unlock(&sb_lock);
488 if (s) {
489 up_write(&s->s_umount);
490 destroy_super(s);
491 }
492 return ERR_PTR(-EBUSY);
493 }
494 if (!grab_super(old))
495 goto retry;
496 if (s) {
497 up_write(&s->s_umount);
498 destroy_super(s);
499 s = NULL;
500 }
501 return old;
502 }
503 }
504 if (!s) {
505 spin_unlock(&sb_lock);
506 s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
507 if (!s)
508 return ERR_PTR(-ENOMEM);
509 goto retry;
510 }
511
512 err = set(s, data);
513 if (err) {
514 spin_unlock(&sb_lock);
515 up_write(&s->s_umount);
516 destroy_super(s);
517 return ERR_PTR(err);
518 }
519 s->s_type = type;
520 strlcpy(s->s_id, type->name, sizeof(s->s_id));
521 list_add_tail(&s->s_list, &super_blocks);
522 hlist_add_head(&s->s_instances, &type->fs_supers);
523 spin_unlock(&sb_lock);
524 get_filesystem(type);
525 register_shrinker(&s->s_shrink);
526 return s;
527}
528
529EXPORT_SYMBOL(sget_userns);
530
531
532
533
534
535
536
537
538
539struct super_block *sget(struct file_system_type *type,
540 int (*test)(struct super_block *,void *),
541 int (*set)(struct super_block *,void *),
542 int flags,
543 void *data)
544{
545 struct user_namespace *user_ns = current_user_ns();
546
547
548
549
550
551 if (flags & MS_SUBMOUNT)
552 user_ns = &init_user_ns;
553
554
555 if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
556 return ERR_PTR(-EPERM);
557
558 return sget_userns(type, test, set, flags, user_ns, data);
559}
560
561EXPORT_SYMBOL(sget);
562
563void drop_super(struct super_block *sb)
564{
565 up_read(&sb->s_umount);
566 put_super(sb);
567}
568
569EXPORT_SYMBOL(drop_super);
570
571void drop_super_exclusive(struct super_block *sb)
572{
573 up_write(&sb->s_umount);
574 put_super(sb);
575}
576EXPORT_SYMBOL(drop_super_exclusive);
577
578
579
580
581
582
583
584
585
586void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
587{
588 struct super_block *sb, *p = NULL;
589
590 spin_lock(&sb_lock);
591 list_for_each_entry(sb, &super_blocks, s_list) {
592 if (hlist_unhashed(&sb->s_instances))
593 continue;
594 sb->s_count++;
595 spin_unlock(&sb_lock);
596
597 down_read(&sb->s_umount);
598 if (sb->s_root && (sb->s_flags & MS_BORN))
599 f(sb, arg);
600 up_read(&sb->s_umount);
601
602 spin_lock(&sb_lock);
603 if (p)
604 __put_super(p);
605 p = sb;
606 }
607 if (p)
608 __put_super(p);
609 spin_unlock(&sb_lock);
610}
611
612
613
614
615
616
617
618
619
620
621void iterate_supers_type(struct file_system_type *type,
622 void (*f)(struct super_block *, void *), void *arg)
623{
624 struct super_block *sb, *p = NULL;
625
626 spin_lock(&sb_lock);
627 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
628 sb->s_count++;
629 spin_unlock(&sb_lock);
630
631 down_read(&sb->s_umount);
632 if (sb->s_root && (sb->s_flags & MS_BORN))
633 f(sb, arg);
634 up_read(&sb->s_umount);
635
636 spin_lock(&sb_lock);
637 if (p)
638 __put_super(p);
639 p = sb;
640 }
641 if (p)
642 __put_super(p);
643 spin_unlock(&sb_lock);
644}
645
646EXPORT_SYMBOL(iterate_supers_type);
647
648static struct super_block *__get_super(struct block_device *bdev, bool excl)
649{
650 struct super_block *sb;
651
652 if (!bdev)
653 return NULL;
654
655 spin_lock(&sb_lock);
656rescan:
657 list_for_each_entry(sb, &super_blocks, s_list) {
658 if (hlist_unhashed(&sb->s_instances))
659 continue;
660 if (sb->s_bdev == bdev) {
661 sb->s_count++;
662 spin_unlock(&sb_lock);
663 if (!excl)
664 down_read(&sb->s_umount);
665 else
666 down_write(&sb->s_umount);
667
668 if (sb->s_root && (sb->s_flags & MS_BORN))
669 return sb;
670 if (!excl)
671 up_read(&sb->s_umount);
672 else
673 up_write(&sb->s_umount);
674
675 spin_lock(&sb_lock);
676 __put_super(sb);
677 goto rescan;
678 }
679 }
680 spin_unlock(&sb_lock);
681 return NULL;
682}
683
684
685
686
687
688
689
690
691struct super_block *get_super(struct block_device *bdev)
692{
693 return __get_super(bdev, false);
694}
695EXPORT_SYMBOL(get_super);
696
697static struct super_block *__get_super_thawed(struct block_device *bdev,
698 bool excl)
699{
700 while (1) {
701 struct super_block *s = __get_super(bdev, excl);
702 if (!s || s->s_writers.frozen == SB_UNFROZEN)
703 return s;
704 if (!excl)
705 up_read(&s->s_umount);
706 else
707 up_write(&s->s_umount);
708 wait_event(s->s_writers.wait_unfrozen,
709 s->s_writers.frozen == SB_UNFROZEN);
710 put_super(s);
711 }
712}
713
714
715
716
717
718
719
720
721
722
723struct super_block *get_super_thawed(struct block_device *bdev)
724{
725 return __get_super_thawed(bdev, false);
726}
727EXPORT_SYMBOL(get_super_thawed);
728
729
730
731
732
733
734
735
736
737
738struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
739{
740 return __get_super_thawed(bdev, true);
741}
742EXPORT_SYMBOL(get_super_exclusive_thawed);
743
744
745
746
747
748
749
750
751
752struct super_block *get_active_super(struct block_device *bdev)
753{
754 struct super_block *sb;
755
756 if (!bdev)
757 return NULL;
758
759restart:
760 spin_lock(&sb_lock);
761 list_for_each_entry(sb, &super_blocks, s_list) {
762 if (hlist_unhashed(&sb->s_instances))
763 continue;
764 if (sb->s_bdev == bdev) {
765 if (!grab_super(sb))
766 goto restart;
767 up_write(&sb->s_umount);
768 return sb;
769 }
770 }
771 spin_unlock(&sb_lock);
772 return NULL;
773}
774
775struct super_block *user_get_super(dev_t dev)
776{
777 struct super_block *sb;
778
779 spin_lock(&sb_lock);
780rescan:
781 list_for_each_entry(sb, &super_blocks, s_list) {
782 if (hlist_unhashed(&sb->s_instances))
783 continue;
784 if (sb->s_dev == dev) {
785 sb->s_count++;
786 spin_unlock(&sb_lock);
787 down_read(&sb->s_umount);
788
789 if (sb->s_root && (sb->s_flags & MS_BORN))
790 return sb;
791 up_read(&sb->s_umount);
792
793 spin_lock(&sb_lock);
794 __put_super(sb);
795 goto rescan;
796 }
797 }
798 spin_unlock(&sb_lock);
799 return NULL;
800}
801
802
803
804
805
806
807
808
809
810
811int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
812{
813 int retval;
814 int remount_ro;
815
816 if (sb->s_writers.frozen != SB_UNFROZEN)
817 return -EBUSY;
818
819#ifdef CONFIG_BLOCK
820 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
821 return -EACCES;
822#endif
823
824 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
825
826 if (remount_ro) {
827 if (!hlist_empty(&sb->s_pins)) {
828 up_write(&sb->s_umount);
829 group_pin_kill(&sb->s_pins);
830 down_write(&sb->s_umount);
831 if (!sb->s_root)
832 return 0;
833 if (sb->s_writers.frozen != SB_UNFROZEN)
834 return -EBUSY;
835 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
836 }
837 }
838 shrink_dcache_sb(sb);
839
840
841
842 if (remount_ro) {
843 if (force) {
844 sb->s_readonly_remount = 1;
845 smp_wmb();
846 } else {
847 retval = sb_prepare_remount_readonly(sb);
848 if (retval)
849 return retval;
850 }
851 }
852
853 if (sb->s_op->remount_fs) {
854 retval = sb->s_op->remount_fs(sb, &flags, data);
855 if (retval) {
856 if (!force)
857 goto cancel_readonly;
858
859 WARN(1, "forced remount of a %s fs returned %i\n",
860 sb->s_type->name, retval);
861 }
862 }
863 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
864
865 smp_wmb();
866 sb->s_readonly_remount = 0;
867
868
869
870
871
872
873
874
875
876 if (remount_ro && sb->s_bdev)
877 invalidate_bdev(sb->s_bdev);
878 return 0;
879
880cancel_readonly:
881 sb->s_readonly_remount = 0;
882 return retval;
883}
884
885static void do_emergency_remount(struct work_struct *work)
886{
887 struct super_block *sb, *p = NULL;
888
889 spin_lock(&sb_lock);
890 list_for_each_entry(sb, &super_blocks, s_list) {
891 if (hlist_unhashed(&sb->s_instances))
892 continue;
893 sb->s_count++;
894 spin_unlock(&sb_lock);
895 down_write(&sb->s_umount);
896 if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
897 !(sb->s_flags & MS_RDONLY)) {
898
899
900
901 do_remount_sb(sb, MS_RDONLY, NULL, 1);
902 }
903 up_write(&sb->s_umount);
904 spin_lock(&sb_lock);
905 if (p)
906 __put_super(p);
907 p = sb;
908 }
909 if (p)
910 __put_super(p);
911 spin_unlock(&sb_lock);
912 kfree(work);
913 printk("Emergency Remount complete\n");
914}
915
916void emergency_remount(void)
917{
918 struct work_struct *work;
919
920 work = kmalloc(sizeof(*work), GFP_ATOMIC);
921 if (work) {
922 INIT_WORK(work, do_emergency_remount);
923 schedule_work(work);
924 }
925}
926
927
928
929
930
931
932static DEFINE_IDA(unnamed_dev_ida);
933static DEFINE_SPINLOCK(unnamed_dev_lock);
934
935
936
937static int unnamed_dev_start = 1;
938
939int get_anon_bdev(dev_t *p)
940{
941 int dev;
942 int error;
943
944 retry:
945 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
946 return -ENOMEM;
947 spin_lock(&unnamed_dev_lock);
948 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
949 if (!error)
950 unnamed_dev_start = dev + 1;
951 spin_unlock(&unnamed_dev_lock);
952 if (error == -EAGAIN)
953
954 goto retry;
955 else if (error)
956 return -EAGAIN;
957
958 if (dev >= (1 << MINORBITS)) {
959 spin_lock(&unnamed_dev_lock);
960 ida_remove(&unnamed_dev_ida, dev);
961 if (unnamed_dev_start > dev)
962 unnamed_dev_start = dev;
963 spin_unlock(&unnamed_dev_lock);
964 return -EMFILE;
965 }
966 *p = MKDEV(0, dev & MINORMASK);
967 return 0;
968}
969EXPORT_SYMBOL(get_anon_bdev);
970
971void free_anon_bdev(dev_t dev)
972{
973 int slot = MINOR(dev);
974 spin_lock(&unnamed_dev_lock);
975 ida_remove(&unnamed_dev_ida, slot);
976 if (slot < unnamed_dev_start)
977 unnamed_dev_start = slot;
978 spin_unlock(&unnamed_dev_lock);
979}
980EXPORT_SYMBOL(free_anon_bdev);
981
982int set_anon_super(struct super_block *s, void *data)
983{
984 return get_anon_bdev(&s->s_dev);
985}
986
987EXPORT_SYMBOL(set_anon_super);
988
989void kill_anon_super(struct super_block *sb)
990{
991 dev_t dev = sb->s_dev;
992 generic_shutdown_super(sb);
993 free_anon_bdev(dev);
994}
995
996EXPORT_SYMBOL(kill_anon_super);
997
998void kill_litter_super(struct super_block *sb)
999{
1000 if (sb->s_root)
1001 d_genocide(sb->s_root);
1002 kill_anon_super(sb);
1003}
1004
1005EXPORT_SYMBOL(kill_litter_super);
1006
1007static int ns_test_super(struct super_block *sb, void *data)
1008{
1009 return sb->s_fs_info == data;
1010}
1011
1012static int ns_set_super(struct super_block *sb, void *data)
1013{
1014 sb->s_fs_info = data;
1015 return set_anon_super(sb, NULL);
1016}
1017
1018struct dentry *mount_ns(struct file_system_type *fs_type,
1019 int flags, void *data, void *ns, struct user_namespace *user_ns,
1020 int (*fill_super)(struct super_block *, void *, int))
1021{
1022 struct super_block *sb;
1023
1024
1025
1026
1027 if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
1028 return ERR_PTR(-EPERM);
1029
1030 sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
1031 user_ns, ns);
1032 if (IS_ERR(sb))
1033 return ERR_CAST(sb);
1034
1035 if (!sb->s_root) {
1036 int err;
1037 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
1038 if (err) {
1039 deactivate_locked_super(sb);
1040 return ERR_PTR(err);
1041 }
1042
1043 sb->s_flags |= MS_ACTIVE;
1044 }
1045
1046 return dget(sb->s_root);
1047}
1048
1049EXPORT_SYMBOL(mount_ns);
1050
1051#ifdef CONFIG_BLOCK
1052static int set_bdev_super(struct super_block *s, void *data)
1053{
1054 s->s_bdev = data;
1055 s->s_dev = s->s_bdev->bd_dev;
1056 s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
1057
1058 return 0;
1059}
1060
1061static int test_bdev_super(struct super_block *s, void *data)
1062{
1063 return (void *)s->s_bdev == data;
1064}
1065
1066struct dentry *mount_bdev(struct file_system_type *fs_type,
1067 int flags, const char *dev_name, void *data,
1068 int (*fill_super)(struct super_block *, void *, int))
1069{
1070 struct block_device *bdev;
1071 struct super_block *s;
1072 fmode_t mode = FMODE_READ | FMODE_EXCL;
1073 int error = 0;
1074
1075 if (!(flags & MS_RDONLY))
1076 mode |= FMODE_WRITE;
1077
1078 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1079 if (IS_ERR(bdev))
1080 return ERR_CAST(bdev);
1081
1082
1083
1084
1085
1086
1087 mutex_lock(&bdev->bd_fsfreeze_mutex);
1088 if (bdev->bd_fsfreeze_count > 0) {
1089 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1090 error = -EBUSY;
1091 goto error_bdev;
1092 }
1093 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
1094 bdev);
1095 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1096 if (IS_ERR(s))
1097 goto error_s;
1098
1099 if (s->s_root) {
1100 if ((flags ^ s->s_flags) & MS_RDONLY) {
1101 deactivate_locked_super(s);
1102 error = -EBUSY;
1103 goto error_bdev;
1104 }
1105
1106
1107
1108
1109
1110
1111
1112
1113 up_write(&s->s_umount);
1114 blkdev_put(bdev, mode);
1115 down_write(&s->s_umount);
1116 } else {
1117 s->s_mode = mode;
1118 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1119 sb_set_blocksize(s, block_size(bdev));
1120 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1121 if (error) {
1122 deactivate_locked_super(s);
1123 goto error;
1124 }
1125
1126 s->s_flags |= MS_ACTIVE;
1127 bdev->bd_super = s;
1128 }
1129
1130 return dget(s->s_root);
1131
1132error_s:
1133 error = PTR_ERR(s);
1134error_bdev:
1135 blkdev_put(bdev, mode);
1136error:
1137 return ERR_PTR(error);
1138}
1139EXPORT_SYMBOL(mount_bdev);
1140
1141void kill_block_super(struct super_block *sb)
1142{
1143 struct block_device *bdev = sb->s_bdev;
1144 fmode_t mode = sb->s_mode;
1145
1146 bdev->bd_super = NULL;
1147 generic_shutdown_super(sb);
1148 sync_blockdev(bdev);
1149 WARN_ON_ONCE(!(mode & FMODE_EXCL));
1150 blkdev_put(bdev, mode | FMODE_EXCL);
1151}
1152
1153EXPORT_SYMBOL(kill_block_super);
1154#endif
1155
1156struct dentry *mount_nodev(struct file_system_type *fs_type,
1157 int flags, void *data,
1158 int (*fill_super)(struct super_block *, void *, int))
1159{
1160 int error;
1161 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1162
1163 if (IS_ERR(s))
1164 return ERR_CAST(s);
1165
1166 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1167 if (error) {
1168 deactivate_locked_super(s);
1169 return ERR_PTR(error);
1170 }
1171 s->s_flags |= MS_ACTIVE;
1172 return dget(s->s_root);
1173}
1174EXPORT_SYMBOL(mount_nodev);
1175
1176static int compare_single(struct super_block *s, void *p)
1177{
1178 return 1;
1179}
1180
1181struct dentry *mount_single(struct file_system_type *fs_type,
1182 int flags, void *data,
1183 int (*fill_super)(struct super_block *, void *, int))
1184{
1185 struct super_block *s;
1186 int error;
1187
1188 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1189 if (IS_ERR(s))
1190 return ERR_CAST(s);
1191 if (!s->s_root) {
1192 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1193 if (error) {
1194 deactivate_locked_super(s);
1195 return ERR_PTR(error);
1196 }
1197 s->s_flags |= MS_ACTIVE;
1198 } else {
1199 do_remount_sb(s, flags, data, 0);
1200 }
1201 return dget(s->s_root);
1202}
1203EXPORT_SYMBOL(mount_single);
1204
1205struct dentry *
1206mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1207{
1208 struct dentry *root;
1209 struct super_block *sb;
1210 char *secdata = NULL;
1211 int error = -ENOMEM;
1212
1213 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
1214 secdata = alloc_secdata();
1215 if (!secdata)
1216 goto out;
1217
1218 error = security_sb_copy_data(data, secdata);
1219 if (error)
1220 goto out_free_secdata;
1221 }
1222
1223 root = type->mount(type, flags, name, data);
1224 if (IS_ERR(root)) {
1225 error = PTR_ERR(root);
1226 goto out_free_secdata;
1227 }
1228 sb = root->d_sb;
1229 BUG_ON(!sb);
1230 WARN_ON(!sb->s_bdi);
1231 sb->s_flags |= MS_BORN;
1232
1233 error = security_sb_kern_mount(sb, flags, secdata);
1234 if (error)
1235 goto out_sb;
1236
1237
1238
1239
1240
1241
1242
1243 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1244 "negative value (%lld)\n", type->name, sb->s_maxbytes);
1245
1246 up_write(&sb->s_umount);
1247 free_secdata(secdata);
1248 return root;
1249out_sb:
1250 dput(root);
1251 deactivate_locked_super(sb);
1252out_free_secdata:
1253 free_secdata(secdata);
1254out:
1255 return ERR_PTR(error);
1256}
1257
1258
1259
1260
1261
1262int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
1263{
1264 struct backing_dev_info *bdi;
1265 int err;
1266 va_list args;
1267
1268 bdi = bdi_alloc(GFP_KERNEL);
1269 if (!bdi)
1270 return -ENOMEM;
1271
1272 bdi->name = sb->s_type->name;
1273
1274 va_start(args, fmt);
1275 err = bdi_register_va(bdi, fmt, args);
1276 va_end(args);
1277 if (err) {
1278 bdi_put(bdi);
1279 return err;
1280 }
1281 WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1282 sb->s_bdi = bdi;
1283
1284 return 0;
1285}
1286EXPORT_SYMBOL(super_setup_bdi_name);
1287
1288
1289
1290
1291
1292int super_setup_bdi(struct super_block *sb)
1293{
1294 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1295
1296 return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
1297 atomic_long_inc_return(&bdi_seq));
1298}
1299EXPORT_SYMBOL(super_setup_bdi);
1300
1301
1302
1303
1304
1305void __sb_end_write(struct super_block *sb, int level)
1306{
1307 percpu_up_read(sb->s_writers.rw_sem + level-1);
1308}
1309EXPORT_SYMBOL(__sb_end_write);
1310
1311
1312
1313
1314
1315int __sb_start_write(struct super_block *sb, int level, bool wait)
1316{
1317 bool force_trylock = false;
1318 int ret = 1;
1319
1320#ifdef CONFIG_LOCKDEP
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330 if (wait) {
1331 int i;
1332
1333 for (i = 0; i < level - 1; i++)
1334 if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
1335 force_trylock = true;
1336 break;
1337 }
1338 }
1339#endif
1340 if (wait && !force_trylock)
1341 percpu_down_read(sb->s_writers.rw_sem + level-1);
1342 else
1343 ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
1344
1345 WARN_ON(force_trylock && !ret);
1346 return ret;
1347}
1348EXPORT_SYMBOL(__sb_start_write);
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358static void sb_wait_write(struct super_block *sb, int level)
1359{
1360 percpu_down_write(sb->s_writers.rw_sem + level-1);
1361}
1362
1363
1364
1365
1366
1367static void lockdep_sb_freeze_release(struct super_block *sb)
1368{
1369 int level;
1370
1371 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1372 percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1373}
1374
1375
1376
1377
1378static void lockdep_sb_freeze_acquire(struct super_block *sb)
1379{
1380 int level;
1381
1382 for (level = 0; level < SB_FREEZE_LEVELS; ++level)
1383 percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1384}
1385
1386static void sb_freeze_unlock(struct super_block *sb)
1387{
1388 int level;
1389
1390 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1391 percpu_up_write(sb->s_writers.rw_sem + level);
1392}
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427int freeze_super(struct super_block *sb)
1428{
1429 int ret;
1430
1431 atomic_inc(&sb->s_active);
1432 down_write(&sb->s_umount);
1433 if (sb->s_writers.frozen != SB_UNFROZEN) {
1434 deactivate_locked_super(sb);
1435 return -EBUSY;
1436 }
1437
1438 if (!(sb->s_flags & MS_BORN)) {
1439 up_write(&sb->s_umount);
1440 return 0;
1441 }
1442
1443 if (sb->s_flags & MS_RDONLY) {
1444
1445 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1446 up_write(&sb->s_umount);
1447 return 0;
1448 }
1449
1450 sb->s_writers.frozen = SB_FREEZE_WRITE;
1451
1452 up_write(&sb->s_umount);
1453 sb_wait_write(sb, SB_FREEZE_WRITE);
1454 down_write(&sb->s_umount);
1455
1456
1457 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1458 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1459
1460
1461 sync_filesystem(sb);
1462
1463
1464 sb->s_writers.frozen = SB_FREEZE_FS;
1465 sb_wait_write(sb, SB_FREEZE_FS);
1466
1467 if (sb->s_op->freeze_fs) {
1468 ret = sb->s_op->freeze_fs(sb);
1469 if (ret) {
1470 printk(KERN_ERR
1471 "VFS:Filesystem freeze failed\n");
1472 sb->s_writers.frozen = SB_UNFROZEN;
1473 sb_freeze_unlock(sb);
1474 wake_up(&sb->s_writers.wait_unfrozen);
1475 deactivate_locked_super(sb);
1476 return ret;
1477 }
1478 }
1479
1480
1481
1482
1483 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1484 lockdep_sb_freeze_release(sb);
1485 up_write(&sb->s_umount);
1486 return 0;
1487}
1488EXPORT_SYMBOL(freeze_super);
1489
1490
1491
1492
1493
1494
1495
1496int thaw_super(struct super_block *sb)
1497{
1498 int error;
1499
1500 down_write(&sb->s_umount);
1501 if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
1502 up_write(&sb->s_umount);
1503 return -EINVAL;
1504 }
1505
1506 if (sb->s_flags & MS_RDONLY) {
1507 sb->s_writers.frozen = SB_UNFROZEN;
1508 goto out;
1509 }
1510
1511 lockdep_sb_freeze_acquire(sb);
1512
1513 if (sb->s_op->unfreeze_fs) {
1514 error = sb->s_op->unfreeze_fs(sb);
1515 if (error) {
1516 printk(KERN_ERR
1517 "VFS:Filesystem thaw failed\n");
1518 lockdep_sb_freeze_release(sb);
1519 up_write(&sb->s_umount);
1520 return error;
1521 }
1522 }
1523
1524 sb->s_writers.frozen = SB_UNFROZEN;
1525 sb_freeze_unlock(sb);
1526out:
1527 wake_up(&sb->s_writers.wait_unfrozen);
1528 deactivate_locked_super(sb);
1529 return 0;
1530}
1531EXPORT_SYMBOL(thaw_super);
1532