1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/export.h>
25#include <linux/slab.h>
26#include <linux/blkdev.h>
27#include <linux/mount.h>
28#include <linux/security.h>
29#include <linux/writeback.h>
30#include <linux/idr.h>
31#include <linux/mutex.h>
32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h>
35#include <linux/fsnotify.h>
36#include <linux/lockdep.h>
37#include <linux/user_namespace.h>
38#include "internal.h"
39
40static int thaw_super_locked(struct super_block *sb);
41
42static LIST_HEAD(super_blocks);
43static DEFINE_SPINLOCK(sb_lock);
44
45static char *sb_writers_name[SB_FREEZE_LEVELS] = {
46 "sb_writers",
47 "sb_pagefaults",
48 "sb_internal",
49};
50
51
52
53
54
55
56
57
58static unsigned long super_cache_scan(struct shrinker *shrink,
59 struct shrink_control *sc)
60{
61 struct super_block *sb;
62 long fs_objects = 0;
63 long total_objects;
64 long freed = 0;
65 long dentries;
66 long inodes;
67
68 sb = container_of(shrink, struct super_block, s_shrink);
69
70
71
72
73
74 if (!(sc->gfp_mask & __GFP_FS))
75 return SHRINK_STOP;
76
77 if (!trylock_super(sb))
78 return SHRINK_STOP;
79
80 if (sb->s_op->nr_cached_objects)
81 fs_objects = sb->s_op->nr_cached_objects(sb, sc);
82
83 inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
84 dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
85 total_objects = dentries + inodes + fs_objects + 1;
86 if (!total_objects)
87 total_objects = 1;
88
89
90 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
91 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
92 fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
93
94
95
96
97
98
99
100
101 sc->nr_to_scan = dentries + 1;
102 freed = prune_dcache_sb(sb, sc);
103 sc->nr_to_scan = inodes + 1;
104 freed += prune_icache_sb(sb, sc);
105
106 if (fs_objects) {
107 sc->nr_to_scan = fs_objects + 1;
108 freed += sb->s_op->free_cached_objects(sb, sc);
109 }
110
111 up_read(&sb->s_umount);
112 return freed;
113}
114
115static unsigned long super_cache_count(struct shrinker *shrink,
116 struct shrink_control *sc)
117{
118 struct super_block *sb;
119 long total_objects = 0;
120
121 sb = container_of(shrink, struct super_block, s_shrink);
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137 if (!(sb->s_flags & SB_BORN))
138 return 0;
139 smp_rmb();
140
141 if (sb->s_op && sb->s_op->nr_cached_objects)
142 total_objects = sb->s_op->nr_cached_objects(sb, sc);
143
144 total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
145 total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
146
147 total_objects = vfs_pressure_ratio(total_objects);
148 return total_objects;
149}
150
151static void destroy_super_work(struct work_struct *work)
152{
153 struct super_block *s = container_of(work, struct super_block,
154 destroy_work);
155 int i;
156
157 for (i = 0; i < SB_FREEZE_LEVELS; i++)
158 percpu_free_rwsem(&s->s_writers.rw_sem[i]);
159 kfree(s);
160}
161
162static void destroy_super_rcu(struct rcu_head *head)
163{
164 struct super_block *s = container_of(head, struct super_block, rcu);
165 INIT_WORK(&s->destroy_work, destroy_super_work);
166 schedule_work(&s->destroy_work);
167}
168
169
170static void destroy_unused_super(struct super_block *s)
171{
172 if (!s)
173 return;
174 up_write(&s->s_umount);
175 list_lru_destroy(&s->s_dentry_lru);
176 list_lru_destroy(&s->s_inode_lru);
177 security_sb_free(s);
178 put_user_ns(s->s_user_ns);
179 kfree(s->s_subtype);
180 free_prealloced_shrinker(&s->s_shrink);
181
182 destroy_super_work(&s->destroy_work);
183}
184
185
186
187
188
189
190
191
192
193
194static struct super_block *alloc_super(struct file_system_type *type, int flags,
195 struct user_namespace *user_ns)
196{
197 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
198 static const struct super_operations default_op;
199 int i;
200
201 if (!s)
202 return NULL;
203
204 INIT_LIST_HEAD(&s->s_mounts);
205 s->s_user_ns = get_user_ns(user_ns);
206 init_rwsem(&s->s_umount);
207 lockdep_set_class(&s->s_umount, &type->s_umount_key);
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
224
225 if (security_sb_alloc(s))
226 goto fail;
227
228 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
229 if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
230 sb_writers_name[i],
231 &type->s_writers_key[i]))
232 goto fail;
233 }
234 init_waitqueue_head(&s->s_writers.wait_unfrozen);
235 s->s_bdi = &noop_backing_dev_info;
236 s->s_flags = flags;
237 if (s->s_user_ns != &init_user_ns)
238 s->s_iflags |= SB_I_NODEV;
239 INIT_HLIST_NODE(&s->s_instances);
240 INIT_HLIST_BL_HEAD(&s->s_roots);
241 mutex_init(&s->s_sync_lock);
242 INIT_LIST_HEAD(&s->s_inodes);
243 spin_lock_init(&s->s_inode_list_lock);
244 INIT_LIST_HEAD(&s->s_inodes_wb);
245 spin_lock_init(&s->s_inode_wblist_lock);
246
247 if (list_lru_init_memcg(&s->s_dentry_lru))
248 goto fail;
249 if (list_lru_init_memcg(&s->s_inode_lru))
250 goto fail;
251 s->s_count = 1;
252 atomic_set(&s->s_active, 1);
253 mutex_init(&s->s_vfs_rename_mutex);
254 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
255 init_rwsem(&s->s_dquot.dqio_sem);
256 s->s_maxbytes = MAX_NON_LFS;
257 s->s_op = &default_op;
258 s->s_time_gran = 1000000000;
259 s->cleancache_poolid = CLEANCACHE_NO_POOL;
260
261 s->s_shrink.seeks = DEFAULT_SEEKS;
262 s->s_shrink.scan_objects = super_cache_scan;
263 s->s_shrink.count_objects = super_cache_count;
264 s->s_shrink.batch = 1024;
265 s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
266 if (prealloc_shrinker(&s->s_shrink))
267 goto fail;
268 return s;
269
270fail:
271 destroy_unused_super(s);
272 return NULL;
273}
274
275
276
277
278
279
280static void __put_super(struct super_block *s)
281{
282 if (!--s->s_count) {
283 list_del_init(&s->s_list);
284 WARN_ON(s->s_dentry_lru.node);
285 WARN_ON(s->s_inode_lru.node);
286 WARN_ON(!list_empty(&s->s_mounts));
287 security_sb_free(s);
288 put_user_ns(s->s_user_ns);
289 kfree(s->s_subtype);
290 call_rcu(&s->rcu, destroy_super_rcu);
291 }
292}
293
294
295
296
297
298
299
300
301static void put_super(struct super_block *sb)
302{
303 spin_lock(&sb_lock);
304 __put_super(sb);
305 spin_unlock(&sb_lock);
306}
307
308
309
310
311
312
313
314
315
316
317
318
319
320void deactivate_locked_super(struct super_block *s)
321{
322 struct file_system_type *fs = s->s_type;
323 if (atomic_dec_and_test(&s->s_active)) {
324 cleancache_invalidate_fs(s);
325 unregister_shrinker(&s->s_shrink);
326 fs->kill_sb(s);
327
328
329
330
331
332
333 list_lru_destroy(&s->s_dentry_lru);
334 list_lru_destroy(&s->s_inode_lru);
335
336 put_filesystem(fs);
337 put_super(s);
338 } else {
339 up_write(&s->s_umount);
340 }
341}
342
343EXPORT_SYMBOL(deactivate_locked_super);
344
345
346
347
348
349
350
351
352
353void deactivate_super(struct super_block *s)
354{
355 if (!atomic_add_unless(&s->s_active, -1, 1)) {
356 down_write(&s->s_umount);
357 deactivate_locked_super(s);
358 }
359}
360
361EXPORT_SYMBOL(deactivate_super);
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376static int grab_super(struct super_block *s) __releases(sb_lock)
377{
378 s->s_count++;
379 spin_unlock(&sb_lock);
380 down_write(&s->s_umount);
381 if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
382 put_super(s);
383 return 1;
384 }
385 up_write(&s->s_umount);
386 put_super(s);
387 return 0;
388}
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407bool trylock_super(struct super_block *sb)
408{
409 if (down_read_trylock(&sb->s_umount)) {
410 if (!hlist_unhashed(&sb->s_instances) &&
411 sb->s_root && (sb->s_flags & SB_BORN))
412 return true;
413 up_read(&sb->s_umount);
414 }
415
416 return false;
417}
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433void generic_shutdown_super(struct super_block *sb)
434{
435 const struct super_operations *sop = sb->s_op;
436
437 if (sb->s_root) {
438 shrink_dcache_for_umount(sb);
439 sync_filesystem(sb);
440 sb->s_flags &= ~SB_ACTIVE;
441
442 fsnotify_unmount_inodes(sb);
443 cgroup_writeback_umount();
444
445 evict_inodes(sb);
446
447 if (sb->s_dio_done_wq) {
448 destroy_workqueue(sb->s_dio_done_wq);
449 sb->s_dio_done_wq = NULL;
450 }
451
452 if (sop->put_super)
453 sop->put_super(sb);
454
455 if (!list_empty(&sb->s_inodes)) {
456 printk("VFS: Busy inodes after unmount of %s. "
457 "Self-destruct in 5 seconds. Have a nice day...\n",
458 sb->s_id);
459 }
460 }
461 spin_lock(&sb_lock);
462
463 hlist_del_init(&sb->s_instances);
464 spin_unlock(&sb_lock);
465 up_write(&sb->s_umount);
466 if (sb->s_bdi != &noop_backing_dev_info) {
467 bdi_put(sb->s_bdi);
468 sb->s_bdi = &noop_backing_dev_info;
469 }
470}
471
472EXPORT_SYMBOL(generic_shutdown_super);
473
474
475
476
477
478
479
480
481
482
483struct super_block *sget_userns(struct file_system_type *type,
484 int (*test)(struct super_block *,void *),
485 int (*set)(struct super_block *,void *),
486 int flags, struct user_namespace *user_ns,
487 void *data)
488{
489 struct super_block *s = NULL;
490 struct super_block *old;
491 int err;
492
493 if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) &&
494 !(type->fs_flags & FS_USERNS_MOUNT) &&
495 !capable(CAP_SYS_ADMIN))
496 return ERR_PTR(-EPERM);
497retry:
498 spin_lock(&sb_lock);
499 if (test) {
500 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
501 if (!test(old, data))
502 continue;
503 if (user_ns != old->s_user_ns) {
504 spin_unlock(&sb_lock);
505 destroy_unused_super(s);
506 return ERR_PTR(-EBUSY);
507 }
508 if (!grab_super(old))
509 goto retry;
510 destroy_unused_super(s);
511 return old;
512 }
513 }
514 if (!s) {
515 spin_unlock(&sb_lock);
516 s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
517 if (!s)
518 return ERR_PTR(-ENOMEM);
519 goto retry;
520 }
521
522 err = set(s, data);
523 if (err) {
524 spin_unlock(&sb_lock);
525 destroy_unused_super(s);
526 return ERR_PTR(err);
527 }
528 s->s_type = type;
529 strlcpy(s->s_id, type->name, sizeof(s->s_id));
530 list_add_tail(&s->s_list, &super_blocks);
531 hlist_add_head(&s->s_instances, &type->fs_supers);
532 spin_unlock(&sb_lock);
533 get_filesystem(type);
534 register_shrinker_prepared(&s->s_shrink);
535 return s;
536}
537
538EXPORT_SYMBOL(sget_userns);
539
540
541
542
543
544
545
546
547
548struct super_block *sget(struct file_system_type *type,
549 int (*test)(struct super_block *,void *),
550 int (*set)(struct super_block *,void *),
551 int flags,
552 void *data)
553{
554 struct user_namespace *user_ns = current_user_ns();
555
556
557
558
559
560 if (flags & SB_SUBMOUNT)
561 user_ns = &init_user_ns;
562
563
564 if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
565 return ERR_PTR(-EPERM);
566
567 return sget_userns(type, test, set, flags, user_ns, data);
568}
569
570EXPORT_SYMBOL(sget);
571
572void drop_super(struct super_block *sb)
573{
574 up_read(&sb->s_umount);
575 put_super(sb);
576}
577
578EXPORT_SYMBOL(drop_super);
579
580void drop_super_exclusive(struct super_block *sb)
581{
582 up_write(&sb->s_umount);
583 put_super(sb);
584}
585EXPORT_SYMBOL(drop_super_exclusive);
586
587static void __iterate_supers(void (*f)(struct super_block *))
588{
589 struct super_block *sb, *p = NULL;
590
591 spin_lock(&sb_lock);
592 list_for_each_entry(sb, &super_blocks, s_list) {
593 if (hlist_unhashed(&sb->s_instances))
594 continue;
595 sb->s_count++;
596 spin_unlock(&sb_lock);
597
598 f(sb);
599
600 spin_lock(&sb_lock);
601 if (p)
602 __put_super(p);
603 p = sb;
604 }
605 if (p)
606 __put_super(p);
607 spin_unlock(&sb_lock);
608}
609
610
611
612
613
614
615
616
617void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
618{
619 struct super_block *sb, *p = NULL;
620
621 spin_lock(&sb_lock);
622 list_for_each_entry(sb, &super_blocks, s_list) {
623 if (hlist_unhashed(&sb->s_instances))
624 continue;
625 sb->s_count++;
626 spin_unlock(&sb_lock);
627
628 down_read(&sb->s_umount);
629 if (sb->s_root && (sb->s_flags & SB_BORN))
630 f(sb, arg);
631 up_read(&sb->s_umount);
632
633 spin_lock(&sb_lock);
634 if (p)
635 __put_super(p);
636 p = sb;
637 }
638 if (p)
639 __put_super(p);
640 spin_unlock(&sb_lock);
641}
642
643
644
645
646
647
648
649
650
651
652void iterate_supers_type(struct file_system_type *type,
653 void (*f)(struct super_block *, void *), void *arg)
654{
655 struct super_block *sb, *p = NULL;
656
657 spin_lock(&sb_lock);
658 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
659 sb->s_count++;
660 spin_unlock(&sb_lock);
661
662 down_read(&sb->s_umount);
663 if (sb->s_root && (sb->s_flags & SB_BORN))
664 f(sb, arg);
665 up_read(&sb->s_umount);
666
667 spin_lock(&sb_lock);
668 if (p)
669 __put_super(p);
670 p = sb;
671 }
672 if (p)
673 __put_super(p);
674 spin_unlock(&sb_lock);
675}
676
677EXPORT_SYMBOL(iterate_supers_type);
678
679static struct super_block *__get_super(struct block_device *bdev, bool excl)
680{
681 struct super_block *sb;
682
683 if (!bdev)
684 return NULL;
685
686 spin_lock(&sb_lock);
687rescan:
688 list_for_each_entry(sb, &super_blocks, s_list) {
689 if (hlist_unhashed(&sb->s_instances))
690 continue;
691 if (sb->s_bdev == bdev) {
692 sb->s_count++;
693 spin_unlock(&sb_lock);
694 if (!excl)
695 down_read(&sb->s_umount);
696 else
697 down_write(&sb->s_umount);
698
699 if (sb->s_root && (sb->s_flags & SB_BORN))
700 return sb;
701 if (!excl)
702 up_read(&sb->s_umount);
703 else
704 up_write(&sb->s_umount);
705
706 spin_lock(&sb_lock);
707 __put_super(sb);
708 goto rescan;
709 }
710 }
711 spin_unlock(&sb_lock);
712 return NULL;
713}
714
715
716
717
718
719
720
721
722struct super_block *get_super(struct block_device *bdev)
723{
724 return __get_super(bdev, false);
725}
726EXPORT_SYMBOL(get_super);
727
728static struct super_block *__get_super_thawed(struct block_device *bdev,
729 bool excl)
730{
731 while (1) {
732 struct super_block *s = __get_super(bdev, excl);
733 if (!s || s->s_writers.frozen == SB_UNFROZEN)
734 return s;
735 if (!excl)
736 up_read(&s->s_umount);
737 else
738 up_write(&s->s_umount);
739 wait_event(s->s_writers.wait_unfrozen,
740 s->s_writers.frozen == SB_UNFROZEN);
741 put_super(s);
742 }
743}
744
745
746
747
748
749
750
751
752
753
754struct super_block *get_super_thawed(struct block_device *bdev)
755{
756 return __get_super_thawed(bdev, false);
757}
758EXPORT_SYMBOL(get_super_thawed);
759
760
761
762
763
764
765
766
767
768
769struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
770{
771 return __get_super_thawed(bdev, true);
772}
773EXPORT_SYMBOL(get_super_exclusive_thawed);
774
775
776
777
778
779
780
781
782
783struct super_block *get_active_super(struct block_device *bdev)
784{
785 struct super_block *sb;
786
787 if (!bdev)
788 return NULL;
789
790restart:
791 spin_lock(&sb_lock);
792 list_for_each_entry(sb, &super_blocks, s_list) {
793 if (hlist_unhashed(&sb->s_instances))
794 continue;
795 if (sb->s_bdev == bdev) {
796 if (!grab_super(sb))
797 goto restart;
798 up_write(&sb->s_umount);
799 return sb;
800 }
801 }
802 spin_unlock(&sb_lock);
803 return NULL;
804}
805
806struct super_block *user_get_super(dev_t dev)
807{
808 struct super_block *sb;
809
810 spin_lock(&sb_lock);
811rescan:
812 list_for_each_entry(sb, &super_blocks, s_list) {
813 if (hlist_unhashed(&sb->s_instances))
814 continue;
815 if (sb->s_dev == dev) {
816 sb->s_count++;
817 spin_unlock(&sb_lock);
818 down_read(&sb->s_umount);
819
820 if (sb->s_root && (sb->s_flags & SB_BORN))
821 return sb;
822 up_read(&sb->s_umount);
823
824 spin_lock(&sb_lock);
825 __put_super(sb);
826 goto rescan;
827 }
828 }
829 spin_unlock(&sb_lock);
830 return NULL;
831}
832
833
834
835
836
837
838
839
840
841
842int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
843{
844 int retval;
845 int remount_ro;
846
847 if (sb->s_writers.frozen != SB_UNFROZEN)
848 return -EBUSY;
849
850#ifdef CONFIG_BLOCK
851 if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev))
852 return -EACCES;
853#endif
854
855 remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb);
856
857 if (remount_ro) {
858 if (!hlist_empty(&sb->s_pins)) {
859 up_write(&sb->s_umount);
860 group_pin_kill(&sb->s_pins);
861 down_write(&sb->s_umount);
862 if (!sb->s_root)
863 return 0;
864 if (sb->s_writers.frozen != SB_UNFROZEN)
865 return -EBUSY;
866 remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb);
867 }
868 }
869 shrink_dcache_sb(sb);
870
871
872
873 if (remount_ro) {
874 if (force) {
875 sb->s_readonly_remount = 1;
876 smp_wmb();
877 } else {
878 retval = sb_prepare_remount_readonly(sb);
879 if (retval)
880 return retval;
881 }
882 }
883
884 if (sb->s_op->remount_fs) {
885 retval = sb->s_op->remount_fs(sb, &sb_flags, data);
886 if (retval) {
887 if (!force)
888 goto cancel_readonly;
889
890 WARN(1, "forced remount of a %s fs returned %i\n",
891 sb->s_type->name, retval);
892 }
893 }
894 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK);
895
896 smp_wmb();
897 sb->s_readonly_remount = 0;
898
899
900
901
902
903
904
905
906
907 if (remount_ro && sb->s_bdev)
908 invalidate_bdev(sb->s_bdev);
909 return 0;
910
911cancel_readonly:
912 sb->s_readonly_remount = 0;
913 return retval;
914}
915
916static void do_emergency_remount_callback(struct super_block *sb)
917{
918 down_write(&sb->s_umount);
919 if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
920 !sb_rdonly(sb)) {
921
922
923
924 do_remount_sb(sb, SB_RDONLY, NULL, 1);
925 }
926 up_write(&sb->s_umount);
927}
928
929static void do_emergency_remount(struct work_struct *work)
930{
931 __iterate_supers(do_emergency_remount_callback);
932 kfree(work);
933 printk("Emergency Remount complete\n");
934}
935
936void emergency_remount(void)
937{
938 struct work_struct *work;
939
940 work = kmalloc(sizeof(*work), GFP_ATOMIC);
941 if (work) {
942 INIT_WORK(work, do_emergency_remount);
943 schedule_work(work);
944 }
945}
946
947static void do_thaw_all_callback(struct super_block *sb)
948{
949 down_write(&sb->s_umount);
950 if (sb->s_root && sb->s_flags & SB_BORN) {
951 emergency_thaw_bdev(sb);
952 thaw_super_locked(sb);
953 } else {
954 up_write(&sb->s_umount);
955 }
956}
957
958static void do_thaw_all(struct work_struct *work)
959{
960 __iterate_supers(do_thaw_all_callback);
961 kfree(work);
962 printk(KERN_WARNING "Emergency Thaw complete\n");
963}
964
965
966
967
968
969
970void emergency_thaw_all(void)
971{
972 struct work_struct *work;
973
974 work = kmalloc(sizeof(*work), GFP_ATOMIC);
975 if (work) {
976 INIT_WORK(work, do_thaw_all);
977 schedule_work(work);
978 }
979}
980
981
982
983
984
985
986static DEFINE_IDA(unnamed_dev_ida);
987static DEFINE_SPINLOCK(unnamed_dev_lock);
988
989
990
991static int unnamed_dev_start = 1;
992
993int get_anon_bdev(dev_t *p)
994{
995 int dev;
996 int error;
997
998 retry:
999 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
1000 return -ENOMEM;
1001 spin_lock(&unnamed_dev_lock);
1002 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
1003 if (!error)
1004 unnamed_dev_start = dev + 1;
1005 spin_unlock(&unnamed_dev_lock);
1006 if (error == -EAGAIN)
1007
1008 goto retry;
1009 else if (error)
1010 return -EAGAIN;
1011
1012 if (dev >= (1 << MINORBITS)) {
1013 spin_lock(&unnamed_dev_lock);
1014 ida_remove(&unnamed_dev_ida, dev);
1015 if (unnamed_dev_start > dev)
1016 unnamed_dev_start = dev;
1017 spin_unlock(&unnamed_dev_lock);
1018 return -EMFILE;
1019 }
1020 *p = MKDEV(0, dev & MINORMASK);
1021 return 0;
1022}
1023EXPORT_SYMBOL(get_anon_bdev);
1024
1025void free_anon_bdev(dev_t dev)
1026{
1027 int slot = MINOR(dev);
1028 spin_lock(&unnamed_dev_lock);
1029 ida_remove(&unnamed_dev_ida, slot);
1030 if (slot < unnamed_dev_start)
1031 unnamed_dev_start = slot;
1032 spin_unlock(&unnamed_dev_lock);
1033}
1034EXPORT_SYMBOL(free_anon_bdev);
1035
1036int set_anon_super(struct super_block *s, void *data)
1037{
1038 return get_anon_bdev(&s->s_dev);
1039}
1040
1041EXPORT_SYMBOL(set_anon_super);
1042
1043void kill_anon_super(struct super_block *sb)
1044{
1045 dev_t dev = sb->s_dev;
1046 generic_shutdown_super(sb);
1047 free_anon_bdev(dev);
1048}
1049
1050EXPORT_SYMBOL(kill_anon_super);
1051
1052void kill_litter_super(struct super_block *sb)
1053{
1054 if (sb->s_root)
1055 d_genocide(sb->s_root);
1056 kill_anon_super(sb);
1057}
1058
1059EXPORT_SYMBOL(kill_litter_super);
1060
1061static int ns_test_super(struct super_block *sb, void *data)
1062{
1063 return sb->s_fs_info == data;
1064}
1065
1066static int ns_set_super(struct super_block *sb, void *data)
1067{
1068 sb->s_fs_info = data;
1069 return set_anon_super(sb, NULL);
1070}
1071
1072struct dentry *mount_ns(struct file_system_type *fs_type,
1073 int flags, void *data, void *ns, struct user_namespace *user_ns,
1074 int (*fill_super)(struct super_block *, void *, int))
1075{
1076 struct super_block *sb;
1077
1078
1079
1080
1081 if (!(flags & SB_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
1082 return ERR_PTR(-EPERM);
1083
1084 sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
1085 user_ns, ns);
1086 if (IS_ERR(sb))
1087 return ERR_CAST(sb);
1088
1089 if (!sb->s_root) {
1090 int err;
1091 err = fill_super(sb, data, flags & SB_SILENT ? 1 : 0);
1092 if (err) {
1093 deactivate_locked_super(sb);
1094 return ERR_PTR(err);
1095 }
1096
1097 sb->s_flags |= SB_ACTIVE;
1098 }
1099
1100 return dget(sb->s_root);
1101}
1102
1103EXPORT_SYMBOL(mount_ns);
1104
1105#ifdef CONFIG_BLOCK
1106static int set_bdev_super(struct super_block *s, void *data)
1107{
1108 s->s_bdev = data;
1109 s->s_dev = s->s_bdev->bd_dev;
1110 s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
1111
1112 return 0;
1113}
1114
1115static int test_bdev_super(struct super_block *s, void *data)
1116{
1117 return (void *)s->s_bdev == data;
1118}
1119
1120struct dentry *mount_bdev(struct file_system_type *fs_type,
1121 int flags, const char *dev_name, void *data,
1122 int (*fill_super)(struct super_block *, void *, int))
1123{
1124 struct block_device *bdev;
1125 struct super_block *s;
1126 fmode_t mode = FMODE_READ | FMODE_EXCL;
1127 int error = 0;
1128
1129 if (!(flags & SB_RDONLY))
1130 mode |= FMODE_WRITE;
1131
1132 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1133 if (IS_ERR(bdev))
1134 return ERR_CAST(bdev);
1135
1136
1137
1138
1139
1140
1141 mutex_lock(&bdev->bd_fsfreeze_mutex);
1142 if (bdev->bd_fsfreeze_count > 0) {
1143 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1144 error = -EBUSY;
1145 goto error_bdev;
1146 }
1147 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
1148 bdev);
1149 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1150 if (IS_ERR(s))
1151 goto error_s;
1152
1153 if (s->s_root) {
1154 if ((flags ^ s->s_flags) & SB_RDONLY) {
1155 deactivate_locked_super(s);
1156 error = -EBUSY;
1157 goto error_bdev;
1158 }
1159
1160
1161
1162
1163
1164
1165
1166
1167 up_write(&s->s_umount);
1168 blkdev_put(bdev, mode);
1169 down_write(&s->s_umount);
1170 } else {
1171 s->s_mode = mode;
1172 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1173 sb_set_blocksize(s, block_size(bdev));
1174 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1175 if (error) {
1176 deactivate_locked_super(s);
1177 goto error;
1178 }
1179
1180 s->s_flags |= SB_ACTIVE;
1181 bdev->bd_super = s;
1182 }
1183
1184 return dget(s->s_root);
1185
1186error_s:
1187 error = PTR_ERR(s);
1188error_bdev:
1189 blkdev_put(bdev, mode);
1190error:
1191 return ERR_PTR(error);
1192}
1193EXPORT_SYMBOL(mount_bdev);
1194
1195void kill_block_super(struct super_block *sb)
1196{
1197 struct block_device *bdev = sb->s_bdev;
1198 fmode_t mode = sb->s_mode;
1199
1200 bdev->bd_super = NULL;
1201 generic_shutdown_super(sb);
1202 sync_blockdev(bdev);
1203 WARN_ON_ONCE(!(mode & FMODE_EXCL));
1204 blkdev_put(bdev, mode | FMODE_EXCL);
1205}
1206
1207EXPORT_SYMBOL(kill_block_super);
1208#endif
1209
1210struct dentry *mount_nodev(struct file_system_type *fs_type,
1211 int flags, void *data,
1212 int (*fill_super)(struct super_block *, void *, int))
1213{
1214 int error;
1215 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1216
1217 if (IS_ERR(s))
1218 return ERR_CAST(s);
1219
1220 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1221 if (error) {
1222 deactivate_locked_super(s);
1223 return ERR_PTR(error);
1224 }
1225 s->s_flags |= SB_ACTIVE;
1226 return dget(s->s_root);
1227}
1228EXPORT_SYMBOL(mount_nodev);
1229
1230static int compare_single(struct super_block *s, void *p)
1231{
1232 return 1;
1233}
1234
1235struct dentry *mount_single(struct file_system_type *fs_type,
1236 int flags, void *data,
1237 int (*fill_super)(struct super_block *, void *, int))
1238{
1239 struct super_block *s;
1240 int error;
1241
1242 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1243 if (IS_ERR(s))
1244 return ERR_CAST(s);
1245 if (!s->s_root) {
1246 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1247 if (error) {
1248 deactivate_locked_super(s);
1249 return ERR_PTR(error);
1250 }
1251 s->s_flags |= SB_ACTIVE;
1252 } else {
1253 do_remount_sb(s, flags, data, 0);
1254 }
1255 return dget(s->s_root);
1256}
1257EXPORT_SYMBOL(mount_single);
1258
1259struct dentry *
1260mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1261{
1262 struct dentry *root;
1263 struct super_block *sb;
1264 char *secdata = NULL;
1265 int error = -ENOMEM;
1266
1267 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
1268 secdata = alloc_secdata();
1269 if (!secdata)
1270 goto out;
1271
1272 error = security_sb_copy_data(data, secdata);
1273 if (error)
1274 goto out_free_secdata;
1275 }
1276
1277 root = type->mount(type, flags, name, data);
1278 if (IS_ERR(root)) {
1279 error = PTR_ERR(root);
1280 goto out_free_secdata;
1281 }
1282 sb = root->d_sb;
1283 BUG_ON(!sb);
1284 WARN_ON(!sb->s_bdi);
1285
1286
1287
1288
1289
1290
1291
1292 smp_wmb();
1293 sb->s_flags |= SB_BORN;
1294
1295 error = security_sb_kern_mount(sb, flags, secdata);
1296 if (error)
1297 goto out_sb;
1298
1299
1300
1301
1302
1303
1304
1305 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1306 "negative value (%lld)\n", type->name, sb->s_maxbytes);
1307
1308 up_write(&sb->s_umount);
1309 free_secdata(secdata);
1310 return root;
1311out_sb:
1312 dput(root);
1313 deactivate_locked_super(sb);
1314out_free_secdata:
1315 free_secdata(secdata);
1316out:
1317 return ERR_PTR(error);
1318}
1319
1320
1321
1322
1323
1324int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
1325{
1326 struct backing_dev_info *bdi;
1327 int err;
1328 va_list args;
1329
1330 bdi = bdi_alloc(GFP_KERNEL);
1331 if (!bdi)
1332 return -ENOMEM;
1333
1334 bdi->name = sb->s_type->name;
1335
1336 va_start(args, fmt);
1337 err = bdi_register_va(bdi, fmt, args);
1338 va_end(args);
1339 if (err) {
1340 bdi_put(bdi);
1341 return err;
1342 }
1343 WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1344 sb->s_bdi = bdi;
1345
1346 return 0;
1347}
1348EXPORT_SYMBOL(super_setup_bdi_name);
1349
1350
1351
1352
1353
1354int super_setup_bdi(struct super_block *sb)
1355{
1356 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1357
1358 return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
1359 atomic_long_inc_return(&bdi_seq));
1360}
1361EXPORT_SYMBOL(super_setup_bdi);
1362
1363
1364
1365
1366
1367void __sb_end_write(struct super_block *sb, int level)
1368{
1369 percpu_up_read(sb->s_writers.rw_sem + level-1);
1370}
1371EXPORT_SYMBOL(__sb_end_write);
1372
1373
1374
1375
1376
1377int __sb_start_write(struct super_block *sb, int level, bool wait)
1378{
1379 bool force_trylock = false;
1380 int ret = 1;
1381
1382#ifdef CONFIG_LOCKDEP
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 if (wait) {
1393 int i;
1394
1395 for (i = 0; i < level - 1; i++)
1396 if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
1397 force_trylock = true;
1398 break;
1399 }
1400 }
1401#endif
1402 if (wait && !force_trylock)
1403 percpu_down_read(sb->s_writers.rw_sem + level-1);
1404 else
1405 ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
1406
1407 WARN_ON(force_trylock && !ret);
1408 return ret;
1409}
1410EXPORT_SYMBOL(__sb_start_write);
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420static void sb_wait_write(struct super_block *sb, int level)
1421{
1422 percpu_down_write(sb->s_writers.rw_sem + level-1);
1423}
1424
1425
1426
1427
1428
1429static void lockdep_sb_freeze_release(struct super_block *sb)
1430{
1431 int level;
1432
1433 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1434 percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1435}
1436
1437
1438
1439
1440static void lockdep_sb_freeze_acquire(struct super_block *sb)
1441{
1442 int level;
1443
1444 for (level = 0; level < SB_FREEZE_LEVELS; ++level)
1445 percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1446}
1447
1448static void sb_freeze_unlock(struct super_block *sb)
1449{
1450 int level;
1451
1452 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1453 percpu_up_write(sb->s_writers.rw_sem + level);
1454}
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489int freeze_super(struct super_block *sb)
1490{
1491 int ret;
1492
1493 atomic_inc(&sb->s_active);
1494 down_write(&sb->s_umount);
1495 if (sb->s_writers.frozen != SB_UNFROZEN) {
1496 deactivate_locked_super(sb);
1497 return -EBUSY;
1498 }
1499
1500 if (!(sb->s_flags & SB_BORN)) {
1501 up_write(&sb->s_umount);
1502 return 0;
1503 }
1504
1505 if (sb_rdonly(sb)) {
1506
1507 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1508 up_write(&sb->s_umount);
1509 return 0;
1510 }
1511
1512 sb->s_writers.frozen = SB_FREEZE_WRITE;
1513
1514 up_write(&sb->s_umount);
1515 sb_wait_write(sb, SB_FREEZE_WRITE);
1516 down_write(&sb->s_umount);
1517
1518
1519 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1520 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1521
1522
1523 sync_filesystem(sb);
1524
1525
1526 sb->s_writers.frozen = SB_FREEZE_FS;
1527 sb_wait_write(sb, SB_FREEZE_FS);
1528
1529 if (sb->s_op->freeze_fs) {
1530 ret = sb->s_op->freeze_fs(sb);
1531 if (ret) {
1532 printk(KERN_ERR
1533 "VFS:Filesystem freeze failed\n");
1534 sb->s_writers.frozen = SB_UNFROZEN;
1535 sb_freeze_unlock(sb);
1536 wake_up(&sb->s_writers.wait_unfrozen);
1537 deactivate_locked_super(sb);
1538 return ret;
1539 }
1540 }
1541
1542
1543
1544
1545 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1546 lockdep_sb_freeze_release(sb);
1547 up_write(&sb->s_umount);
1548 return 0;
1549}
1550EXPORT_SYMBOL(freeze_super);
1551
1552
1553
1554
1555
1556
1557
1558static int thaw_super_locked(struct super_block *sb)
1559{
1560 int error;
1561
1562 if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
1563 up_write(&sb->s_umount);
1564 return -EINVAL;
1565 }
1566
1567 if (sb_rdonly(sb)) {
1568 sb->s_writers.frozen = SB_UNFROZEN;
1569 goto out;
1570 }
1571
1572 lockdep_sb_freeze_acquire(sb);
1573
1574 if (sb->s_op->unfreeze_fs) {
1575 error = sb->s_op->unfreeze_fs(sb);
1576 if (error) {
1577 printk(KERN_ERR
1578 "VFS:Filesystem thaw failed\n");
1579 lockdep_sb_freeze_release(sb);
1580 up_write(&sb->s_umount);
1581 return error;
1582 }
1583 }
1584
1585 sb->s_writers.frozen = SB_UNFROZEN;
1586 sb_freeze_unlock(sb);
1587out:
1588 wake_up(&sb->s_writers.wait_unfrozen);
1589 deactivate_locked_super(sb);
1590 return 0;
1591}
1592
1593int thaw_super(struct super_block *sb)
1594{
1595 down_write(&sb->s_umount);
1596 return thaw_super_locked(sb);
1597}
1598EXPORT_SYMBOL(thaw_super);
1599