1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/export.h>
25#include <linux/slab.h>
26#include <linux/blkdev.h>
27#include <linux/mount.h>
28#include <linux/security.h>
29#include <linux/writeback.h>
30#include <linux/idr.h>
31#include <linux/mutex.h>
32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h>
35#include <linux/fscrypt.h>
36#include <linux/fsnotify.h>
37#include <linux/lockdep.h>
38#include <linux/user_namespace.h>
39#include <linux/fs_context.h>
40#include <uapi/linux/mount.h>
41#include "internal.h"
42
43static int thaw_super_locked(struct super_block *sb);
44
45static LIST_HEAD(super_blocks);
46static DEFINE_SPINLOCK(sb_lock);
47
48static char *sb_writers_name[SB_FREEZE_LEVELS] = {
49 "sb_writers",
50 "sb_pagefaults",
51 "sb_internal",
52};
53
54
55
56
57
58
59
60
61static unsigned long super_cache_scan(struct shrinker *shrink,
62 struct shrink_control *sc)
63{
64 struct super_block *sb;
65 long fs_objects = 0;
66 long total_objects;
67 long freed = 0;
68 long dentries;
69 long inodes;
70
71 sb = container_of(shrink, struct super_block, s_shrink);
72
73
74
75
76
77 if (!(sc->gfp_mask & __GFP_FS))
78 return SHRINK_STOP;
79
80 if (!trylock_super(sb))
81 return SHRINK_STOP;
82
83 if (sb->s_op->nr_cached_objects)
84 fs_objects = sb->s_op->nr_cached_objects(sb, sc);
85
86 inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
87 dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
88 total_objects = dentries + inodes + fs_objects + 1;
89 if (!total_objects)
90 total_objects = 1;
91
92
93 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
94 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
95 fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
96
97
98
99
100
101
102
103
104 sc->nr_to_scan = dentries + 1;
105 freed = prune_dcache_sb(sb, sc);
106 sc->nr_to_scan = inodes + 1;
107 freed += prune_icache_sb(sb, sc);
108
109 if (fs_objects) {
110 sc->nr_to_scan = fs_objects + 1;
111 freed += sb->s_op->free_cached_objects(sb, sc);
112 }
113
114 up_read(&sb->s_umount);
115 return freed;
116}
117
118static unsigned long super_cache_count(struct shrinker *shrink,
119 struct shrink_control *sc)
120{
121 struct super_block *sb;
122 long total_objects = 0;
123
124 sb = container_of(shrink, struct super_block, s_shrink);
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140 if (!(sb->s_flags & SB_BORN))
141 return 0;
142 smp_rmb();
143
144 if (sb->s_op && sb->s_op->nr_cached_objects)
145 total_objects = sb->s_op->nr_cached_objects(sb, sc);
146
147 total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
148 total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
149
150 if (!total_objects)
151 return SHRINK_EMPTY;
152
153 total_objects = vfs_pressure_ratio(total_objects);
154 return total_objects;
155}
156
157static void destroy_super_work(struct work_struct *work)
158{
159 struct super_block *s = container_of(work, struct super_block,
160 destroy_work);
161 int i;
162
163 for (i = 0; i < SB_FREEZE_LEVELS; i++)
164 percpu_free_rwsem(&s->s_writers.rw_sem[i]);
165 kfree(s);
166}
167
168static void destroy_super_rcu(struct rcu_head *head)
169{
170 struct super_block *s = container_of(head, struct super_block, rcu);
171 INIT_WORK(&s->destroy_work, destroy_super_work);
172 schedule_work(&s->destroy_work);
173}
174
175
176static void destroy_unused_super(struct super_block *s)
177{
178 if (!s)
179 return;
180 up_write(&s->s_umount);
181 list_lru_destroy(&s->s_dentry_lru);
182 list_lru_destroy(&s->s_inode_lru);
183 security_sb_free(s);
184 put_user_ns(s->s_user_ns);
185 kfree(s->s_subtype);
186 free_prealloced_shrinker(&s->s_shrink);
187
188 destroy_super_work(&s->destroy_work);
189}
190
191
192
193
194
195
196
197
198
199
200static struct super_block *alloc_super(struct file_system_type *type, int flags,
201 struct user_namespace *user_ns)
202{
203 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
204 static const struct super_operations default_op;
205 int i;
206
207 if (!s)
208 return NULL;
209
210 INIT_LIST_HEAD(&s->s_mounts);
211 s->s_user_ns = get_user_ns(user_ns);
212 init_rwsem(&s->s_umount);
213 lockdep_set_class(&s->s_umount, &type->s_umount_key);
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
230
231 if (security_sb_alloc(s))
232 goto fail;
233
234 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
235 if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
236 sb_writers_name[i],
237 &type->s_writers_key[i]))
238 goto fail;
239 }
240 init_waitqueue_head(&s->s_writers.wait_unfrozen);
241 s->s_bdi = &noop_backing_dev_info;
242 s->s_flags = flags;
243 if (s->s_user_ns != &init_user_ns)
244 s->s_iflags |= SB_I_NODEV;
245 INIT_HLIST_NODE(&s->s_instances);
246 INIT_HLIST_BL_HEAD(&s->s_roots);
247 mutex_init(&s->s_sync_lock);
248 INIT_LIST_HEAD(&s->s_inodes);
249 spin_lock_init(&s->s_inode_list_lock);
250 INIT_LIST_HEAD(&s->s_inodes_wb);
251 spin_lock_init(&s->s_inode_wblist_lock);
252
253 s->s_count = 1;
254 atomic_set(&s->s_active, 1);
255 mutex_init(&s->s_vfs_rename_mutex);
256 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
257 init_rwsem(&s->s_dquot.dqio_sem);
258 s->s_maxbytes = MAX_NON_LFS;
259 s->s_op = &default_op;
260 s->s_time_gran = 1000000000;
261 s->s_time_min = TIME64_MIN;
262 s->s_time_max = TIME64_MAX;
263 s->cleancache_poolid = CLEANCACHE_NO_POOL;
264
265 s->s_shrink.seeks = DEFAULT_SEEKS;
266 s->s_shrink.scan_objects = super_cache_scan;
267 s->s_shrink.count_objects = super_cache_count;
268 s->s_shrink.batch = 1024;
269 s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
270 if (prealloc_shrinker(&s->s_shrink))
271 goto fail;
272 if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
273 goto fail;
274 if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
275 goto fail;
276 return s;
277
278fail:
279 destroy_unused_super(s);
280 return NULL;
281}
282
283
284
285
286
287
288static void __put_super(struct super_block *s)
289{
290 if (!--s->s_count) {
291 list_del_init(&s->s_list);
292 WARN_ON(s->s_dentry_lru.node);
293 WARN_ON(s->s_inode_lru.node);
294 WARN_ON(!list_empty(&s->s_mounts));
295 security_sb_free(s);
296 fscrypt_sb_free(s);
297 put_user_ns(s->s_user_ns);
298 kfree(s->s_subtype);
299 call_rcu(&s->rcu, destroy_super_rcu);
300 }
301}
302
303
304
305
306
307
308
309
310void put_super(struct super_block *sb)
311{
312 spin_lock(&sb_lock);
313 __put_super(sb);
314 spin_unlock(&sb_lock);
315}
316
317
318
319
320
321
322
323
324
325
326
327
328
329void deactivate_locked_super(struct super_block *s)
330{
331 struct file_system_type *fs = s->s_type;
332 if (atomic_dec_and_test(&s->s_active)) {
333 cleancache_invalidate_fs(s);
334 unregister_shrinker(&s->s_shrink);
335 fs->kill_sb(s);
336
337
338
339
340
341
342 list_lru_destroy(&s->s_dentry_lru);
343 list_lru_destroy(&s->s_inode_lru);
344
345 put_filesystem(fs);
346 put_super(s);
347 } else {
348 up_write(&s->s_umount);
349 }
350}
351
352EXPORT_SYMBOL(deactivate_locked_super);
353
354
355
356
357
358
359
360
361
362void deactivate_super(struct super_block *s)
363{
364 if (!atomic_add_unless(&s->s_active, -1, 1)) {
365 down_write(&s->s_umount);
366 deactivate_locked_super(s);
367 }
368}
369
370EXPORT_SYMBOL(deactivate_super);
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385static int grab_super(struct super_block *s) __releases(sb_lock)
386{
387 s->s_count++;
388 spin_unlock(&sb_lock);
389 down_write(&s->s_umount);
390 if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
391 put_super(s);
392 return 1;
393 }
394 up_write(&s->s_umount);
395 put_super(s);
396 return 0;
397}
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416bool trylock_super(struct super_block *sb)
417{
418 if (down_read_trylock(&sb->s_umount)) {
419 if (!hlist_unhashed(&sb->s_instances) &&
420 sb->s_root && (sb->s_flags & SB_BORN))
421 return true;
422 up_read(&sb->s_umount);
423 }
424
425 return false;
426}
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442void generic_shutdown_super(struct super_block *sb)
443{
444 const struct super_operations *sop = sb->s_op;
445
446 if (sb->s_root) {
447 shrink_dcache_for_umount(sb);
448 sync_filesystem(sb);
449 sb->s_flags &= ~SB_ACTIVE;
450
451 cgroup_writeback_umount();
452
453
454 evict_inodes(sb);
455
456 fsnotify_sb_delete(sb);
457
458 if (sb->s_dio_done_wq) {
459 destroy_workqueue(sb->s_dio_done_wq);
460 sb->s_dio_done_wq = NULL;
461 }
462
463 if (sop->put_super)
464 sop->put_super(sb);
465
466 if (!list_empty(&sb->s_inodes)) {
467 printk("VFS: Busy inodes after unmount of %s. "
468 "Self-destruct in 5 seconds. Have a nice day...\n",
469 sb->s_id);
470 }
471 }
472 spin_lock(&sb_lock);
473
474 hlist_del_init(&sb->s_instances);
475 spin_unlock(&sb_lock);
476 up_write(&sb->s_umount);
477 if (sb->s_bdi != &noop_backing_dev_info) {
478 bdi_put(sb->s_bdi);
479 sb->s_bdi = &noop_backing_dev_info;
480 }
481}
482
483EXPORT_SYMBOL(generic_shutdown_super);
484
485bool mount_capable(struct fs_context *fc)
486{
487 if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT))
488 return capable(CAP_SYS_ADMIN);
489 else
490 return ns_capable(fc->user_ns, CAP_SYS_ADMIN);
491}
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511struct super_block *sget_fc(struct fs_context *fc,
512 int (*test)(struct super_block *, struct fs_context *),
513 int (*set)(struct super_block *, struct fs_context *))
514{
515 struct super_block *s = NULL;
516 struct super_block *old;
517 struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
518 int err;
519
520retry:
521 spin_lock(&sb_lock);
522 if (test) {
523 hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
524 if (test(old, fc))
525 goto share_extant_sb;
526 }
527 }
528 if (!s) {
529 spin_unlock(&sb_lock);
530 s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
531 if (!s)
532 return ERR_PTR(-ENOMEM);
533 goto retry;
534 }
535
536 s->s_fs_info = fc->s_fs_info;
537 err = set(s, fc);
538 if (err) {
539 s->s_fs_info = NULL;
540 spin_unlock(&sb_lock);
541 destroy_unused_super(s);
542 return ERR_PTR(err);
543 }
544 fc->s_fs_info = NULL;
545 s->s_type = fc->fs_type;
546 s->s_iflags |= fc->s_iflags;
547 strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
548 list_add_tail(&s->s_list, &super_blocks);
549 hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
550 spin_unlock(&sb_lock);
551 get_filesystem(s->s_type);
552 register_shrinker_prepared(&s->s_shrink);
553 return s;
554
555share_extant_sb:
556 if (user_ns != old->s_user_ns) {
557 spin_unlock(&sb_lock);
558 destroy_unused_super(s);
559 return ERR_PTR(-EBUSY);
560 }
561 if (!grab_super(old))
562 goto retry;
563 destroy_unused_super(s);
564 return old;
565}
566EXPORT_SYMBOL(sget_fc);
567
568
569
570
571
572
573
574
575
576struct super_block *sget(struct file_system_type *type,
577 int (*test)(struct super_block *,void *),
578 int (*set)(struct super_block *,void *),
579 int flags,
580 void *data)
581{
582 struct user_namespace *user_ns = current_user_ns();
583 struct super_block *s = NULL;
584 struct super_block *old;
585 int err;
586
587
588
589
590
591 if (flags & SB_SUBMOUNT)
592 user_ns = &init_user_ns;
593
594retry:
595 spin_lock(&sb_lock);
596 if (test) {
597 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
598 if (!test(old, data))
599 continue;
600 if (user_ns != old->s_user_ns) {
601 spin_unlock(&sb_lock);
602 destroy_unused_super(s);
603 return ERR_PTR(-EBUSY);
604 }
605 if (!grab_super(old))
606 goto retry;
607 destroy_unused_super(s);
608 return old;
609 }
610 }
611 if (!s) {
612 spin_unlock(&sb_lock);
613 s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
614 if (!s)
615 return ERR_PTR(-ENOMEM);
616 goto retry;
617 }
618
619 err = set(s, data);
620 if (err) {
621 spin_unlock(&sb_lock);
622 destroy_unused_super(s);
623 return ERR_PTR(err);
624 }
625 s->s_type = type;
626 strlcpy(s->s_id, type->name, sizeof(s->s_id));
627 list_add_tail(&s->s_list, &super_blocks);
628 hlist_add_head(&s->s_instances, &type->fs_supers);
629 spin_unlock(&sb_lock);
630 get_filesystem(type);
631 register_shrinker_prepared(&s->s_shrink);
632 return s;
633}
634EXPORT_SYMBOL(sget);
635
636void drop_super(struct super_block *sb)
637{
638 up_read(&sb->s_umount);
639 put_super(sb);
640}
641
642EXPORT_SYMBOL(drop_super);
643
644void drop_super_exclusive(struct super_block *sb)
645{
646 up_write(&sb->s_umount);
647 put_super(sb);
648}
649EXPORT_SYMBOL(drop_super_exclusive);
650
651static void __iterate_supers(void (*f)(struct super_block *))
652{
653 struct super_block *sb, *p = NULL;
654
655 spin_lock(&sb_lock);
656 list_for_each_entry(sb, &super_blocks, s_list) {
657 if (hlist_unhashed(&sb->s_instances))
658 continue;
659 sb->s_count++;
660 spin_unlock(&sb_lock);
661
662 f(sb);
663
664 spin_lock(&sb_lock);
665 if (p)
666 __put_super(p);
667 p = sb;
668 }
669 if (p)
670 __put_super(p);
671 spin_unlock(&sb_lock);
672}
673
674
675
676
677
678
679
680
681void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
682{
683 struct super_block *sb, *p = NULL;
684
685 spin_lock(&sb_lock);
686 list_for_each_entry(sb, &super_blocks, s_list) {
687 if (hlist_unhashed(&sb->s_instances))
688 continue;
689 sb->s_count++;
690 spin_unlock(&sb_lock);
691
692 down_read(&sb->s_umount);
693 if (sb->s_root && (sb->s_flags & SB_BORN))
694 f(sb, arg);
695 up_read(&sb->s_umount);
696
697 spin_lock(&sb_lock);
698 if (p)
699 __put_super(p);
700 p = sb;
701 }
702 if (p)
703 __put_super(p);
704 spin_unlock(&sb_lock);
705}
706
707
708
709
710
711
712
713
714
715
716void iterate_supers_type(struct file_system_type *type,
717 void (*f)(struct super_block *, void *), void *arg)
718{
719 struct super_block *sb, *p = NULL;
720
721 spin_lock(&sb_lock);
722 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
723 sb->s_count++;
724 spin_unlock(&sb_lock);
725
726 down_read(&sb->s_umount);
727 if (sb->s_root && (sb->s_flags & SB_BORN))
728 f(sb, arg);
729 up_read(&sb->s_umount);
730
731 spin_lock(&sb_lock);
732 if (p)
733 __put_super(p);
734 p = sb;
735 }
736 if (p)
737 __put_super(p);
738 spin_unlock(&sb_lock);
739}
740
741EXPORT_SYMBOL(iterate_supers_type);
742
743
744
745
746
747
748
749
750struct super_block *get_super(struct block_device *bdev)
751{
752 struct super_block *sb;
753
754 if (!bdev)
755 return NULL;
756
757 spin_lock(&sb_lock);
758rescan:
759 list_for_each_entry(sb, &super_blocks, s_list) {
760 if (hlist_unhashed(&sb->s_instances))
761 continue;
762 if (sb->s_bdev == bdev) {
763 sb->s_count++;
764 spin_unlock(&sb_lock);
765 down_read(&sb->s_umount);
766
767 if (sb->s_root && (sb->s_flags & SB_BORN))
768 return sb;
769 up_read(&sb->s_umount);
770
771 spin_lock(&sb_lock);
772 __put_super(sb);
773 goto rescan;
774 }
775 }
776 spin_unlock(&sb_lock);
777 return NULL;
778}
779
780
781
782
783
784
785
786
787
788struct super_block *get_active_super(struct block_device *bdev)
789{
790 struct super_block *sb;
791
792 if (!bdev)
793 return NULL;
794
795restart:
796 spin_lock(&sb_lock);
797 list_for_each_entry(sb, &super_blocks, s_list) {
798 if (hlist_unhashed(&sb->s_instances))
799 continue;
800 if (sb->s_bdev == bdev) {
801 if (!grab_super(sb))
802 goto restart;
803 up_write(&sb->s_umount);
804 return sb;
805 }
806 }
807 spin_unlock(&sb_lock);
808 return NULL;
809}
810
811struct super_block *user_get_super(dev_t dev, bool excl)
812{
813 struct super_block *sb;
814
815 spin_lock(&sb_lock);
816rescan:
817 list_for_each_entry(sb, &super_blocks, s_list) {
818 if (hlist_unhashed(&sb->s_instances))
819 continue;
820 if (sb->s_dev == dev) {
821 sb->s_count++;
822 spin_unlock(&sb_lock);
823 if (excl)
824 down_write(&sb->s_umount);
825 else
826 down_read(&sb->s_umount);
827
828 if (sb->s_root && (sb->s_flags & SB_BORN))
829 return sb;
830 if (excl)
831 up_write(&sb->s_umount);
832 else
833 up_read(&sb->s_umount);
834
835 spin_lock(&sb_lock);
836 __put_super(sb);
837 goto rescan;
838 }
839 }
840 spin_unlock(&sb_lock);
841 return NULL;
842}
843
844
845
846
847
848
849
850int reconfigure_super(struct fs_context *fc)
851{
852 struct super_block *sb = fc->root->d_sb;
853 int retval;
854 bool remount_ro = false;
855 bool force = fc->sb_flags & SB_FORCE;
856
857 if (fc->sb_flags_mask & ~MS_RMT_MASK)
858 return -EINVAL;
859 if (sb->s_writers.frozen != SB_UNFROZEN)
860 return -EBUSY;
861
862 retval = security_sb_remount(sb, fc->security);
863 if (retval)
864 return retval;
865
866 if (fc->sb_flags_mask & SB_RDONLY) {
867#ifdef CONFIG_BLOCK
868 if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev &&
869 bdev_read_only(sb->s_bdev))
870 return -EACCES;
871#endif
872
873 remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
874 }
875
876 if (remount_ro) {
877 if (!hlist_empty(&sb->s_pins)) {
878 up_write(&sb->s_umount);
879 group_pin_kill(&sb->s_pins);
880 down_write(&sb->s_umount);
881 if (!sb->s_root)
882 return 0;
883 if (sb->s_writers.frozen != SB_UNFROZEN)
884 return -EBUSY;
885 remount_ro = !sb_rdonly(sb);
886 }
887 }
888 shrink_dcache_sb(sb);
889
890
891
892
893 if (remount_ro) {
894 if (force) {
895 sb->s_readonly_remount = 1;
896 smp_wmb();
897 } else {
898 retval = sb_prepare_remount_readonly(sb);
899 if (retval)
900 return retval;
901 }
902 }
903
904 if (fc->ops->reconfigure) {
905 retval = fc->ops->reconfigure(fc);
906 if (retval) {
907 if (!force)
908 goto cancel_readonly;
909
910 WARN(1, "forced remount of a %s fs returned %i\n",
911 sb->s_type->name, retval);
912 }
913 }
914
915 WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
916 (fc->sb_flags & fc->sb_flags_mask)));
917
918 smp_wmb();
919 sb->s_readonly_remount = 0;
920
921
922
923
924
925
926
927
928
929 if (remount_ro && sb->s_bdev)
930 invalidate_bdev(sb->s_bdev);
931 return 0;
932
933cancel_readonly:
934 sb->s_readonly_remount = 0;
935 return retval;
936}
937
938static void do_emergency_remount_callback(struct super_block *sb)
939{
940 down_write(&sb->s_umount);
941 if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
942 !sb_rdonly(sb)) {
943 struct fs_context *fc;
944
945 fc = fs_context_for_reconfigure(sb->s_root,
946 SB_RDONLY | SB_FORCE, SB_RDONLY);
947 if (!IS_ERR(fc)) {
948 if (parse_monolithic_mount_data(fc, NULL) == 0)
949 (void)reconfigure_super(fc);
950 put_fs_context(fc);
951 }
952 }
953 up_write(&sb->s_umount);
954}
955
956static void do_emergency_remount(struct work_struct *work)
957{
958 __iterate_supers(do_emergency_remount_callback);
959 kfree(work);
960 printk("Emergency Remount complete\n");
961}
962
963void emergency_remount(void)
964{
965 struct work_struct *work;
966
967 work = kmalloc(sizeof(*work), GFP_ATOMIC);
968 if (work) {
969 INIT_WORK(work, do_emergency_remount);
970 schedule_work(work);
971 }
972}
973
974static void do_thaw_all_callback(struct super_block *sb)
975{
976 down_write(&sb->s_umount);
977 if (sb->s_root && sb->s_flags & SB_BORN) {
978 emergency_thaw_bdev(sb);
979 thaw_super_locked(sb);
980 } else {
981 up_write(&sb->s_umount);
982 }
983}
984
985static void do_thaw_all(struct work_struct *work)
986{
987 __iterate_supers(do_thaw_all_callback);
988 kfree(work);
989 printk(KERN_WARNING "Emergency Thaw complete\n");
990}
991
992
993
994
995
996
997void emergency_thaw_all(void)
998{
999 struct work_struct *work;
1000
1001 work = kmalloc(sizeof(*work), GFP_ATOMIC);
1002 if (work) {
1003 INIT_WORK(work, do_thaw_all);
1004 schedule_work(work);
1005 }
1006}
1007
1008static DEFINE_IDA(unnamed_dev_ida);
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021int get_anon_bdev(dev_t *p)
1022{
1023 int dev;
1024
1025
1026
1027
1028
1029 dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1,
1030 GFP_ATOMIC);
1031 if (dev == -ENOSPC)
1032 dev = -EMFILE;
1033 if (dev < 0)
1034 return dev;
1035
1036 *p = MKDEV(0, dev);
1037 return 0;
1038}
1039EXPORT_SYMBOL(get_anon_bdev);
1040
1041void free_anon_bdev(dev_t dev)
1042{
1043 ida_free(&unnamed_dev_ida, MINOR(dev));
1044}
1045EXPORT_SYMBOL(free_anon_bdev);
1046
1047int set_anon_super(struct super_block *s, void *data)
1048{
1049 return get_anon_bdev(&s->s_dev);
1050}
1051EXPORT_SYMBOL(set_anon_super);
1052
1053void kill_anon_super(struct super_block *sb)
1054{
1055 dev_t dev = sb->s_dev;
1056 generic_shutdown_super(sb);
1057 free_anon_bdev(dev);
1058}
1059EXPORT_SYMBOL(kill_anon_super);
1060
1061void kill_litter_super(struct super_block *sb)
1062{
1063 if (sb->s_root)
1064 d_genocide(sb->s_root);
1065 kill_anon_super(sb);
1066}
1067EXPORT_SYMBOL(kill_litter_super);
1068
1069int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
1070{
1071 return set_anon_super(sb, NULL);
1072}
1073EXPORT_SYMBOL(set_anon_super_fc);
1074
1075static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
1076{
1077 return sb->s_fs_info == fc->s_fs_info;
1078}
1079
1080static int test_single_super(struct super_block *s, struct fs_context *fc)
1081{
1082 return 1;
1083}
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110int vfs_get_super(struct fs_context *fc,
1111 enum vfs_get_super_keying keying,
1112 int (*fill_super)(struct super_block *sb,
1113 struct fs_context *fc))
1114{
1115 int (*test)(struct super_block *, struct fs_context *);
1116 struct super_block *sb;
1117 int err;
1118
1119 switch (keying) {
1120 case vfs_get_single_super:
1121 case vfs_get_single_reconf_super:
1122 test = test_single_super;
1123 break;
1124 case vfs_get_keyed_super:
1125 test = test_keyed_super;
1126 break;
1127 case vfs_get_independent_super:
1128 test = NULL;
1129 break;
1130 default:
1131 BUG();
1132 }
1133
1134 sb = sget_fc(fc, test, set_anon_super_fc);
1135 if (IS_ERR(sb))
1136 return PTR_ERR(sb);
1137
1138 if (!sb->s_root) {
1139 err = fill_super(sb, fc);
1140 if (err)
1141 goto error;
1142
1143 sb->s_flags |= SB_ACTIVE;
1144 fc->root = dget(sb->s_root);
1145 } else {
1146 fc->root = dget(sb->s_root);
1147 if (keying == vfs_get_single_reconf_super) {
1148 err = reconfigure_super(fc);
1149 if (err < 0) {
1150 dput(fc->root);
1151 fc->root = NULL;
1152 goto error;
1153 }
1154 }
1155 }
1156
1157 return 0;
1158
1159error:
1160 deactivate_locked_super(sb);
1161 return err;
1162}
1163EXPORT_SYMBOL(vfs_get_super);
1164
1165int get_tree_nodev(struct fs_context *fc,
1166 int (*fill_super)(struct super_block *sb,
1167 struct fs_context *fc))
1168{
1169 return vfs_get_super(fc, vfs_get_independent_super, fill_super);
1170}
1171EXPORT_SYMBOL(get_tree_nodev);
1172
1173int get_tree_single(struct fs_context *fc,
1174 int (*fill_super)(struct super_block *sb,
1175 struct fs_context *fc))
1176{
1177 return vfs_get_super(fc, vfs_get_single_super, fill_super);
1178}
1179EXPORT_SYMBOL(get_tree_single);
1180
1181int get_tree_single_reconf(struct fs_context *fc,
1182 int (*fill_super)(struct super_block *sb,
1183 struct fs_context *fc))
1184{
1185 return vfs_get_super(fc, vfs_get_single_reconf_super, fill_super);
1186}
1187EXPORT_SYMBOL(get_tree_single_reconf);
1188
1189int get_tree_keyed(struct fs_context *fc,
1190 int (*fill_super)(struct super_block *sb,
1191 struct fs_context *fc),
1192 void *key)
1193{
1194 fc->s_fs_info = key;
1195 return vfs_get_super(fc, vfs_get_keyed_super, fill_super);
1196}
1197EXPORT_SYMBOL(get_tree_keyed);
1198
1199#ifdef CONFIG_BLOCK
1200
1201static int set_bdev_super(struct super_block *s, void *data)
1202{
1203 s->s_bdev = data;
1204 s->s_dev = s->s_bdev->bd_dev;
1205 s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
1206
1207 if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
1208 s->s_iflags |= SB_I_STABLE_WRITES;
1209 return 0;
1210}
1211
1212static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
1213{
1214 return set_bdev_super(s, fc->sget_key);
1215}
1216
1217static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
1218{
1219 return s->s_bdev == fc->sget_key;
1220}
1221
1222
1223
1224
1225
1226
1227int get_tree_bdev(struct fs_context *fc,
1228 int (*fill_super)(struct super_block *,
1229 struct fs_context *))
1230{
1231 struct block_device *bdev;
1232 struct super_block *s;
1233 fmode_t mode = FMODE_READ | FMODE_EXCL;
1234 int error = 0;
1235
1236 if (!(fc->sb_flags & SB_RDONLY))
1237 mode |= FMODE_WRITE;
1238
1239 if (!fc->source)
1240 return invalf(fc, "No source specified");
1241
1242 bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type);
1243 if (IS_ERR(bdev)) {
1244 errorf(fc, "%s: Can't open blockdev", fc->source);
1245 return PTR_ERR(bdev);
1246 }
1247
1248
1249
1250
1251
1252 mutex_lock(&bdev->bd_fsfreeze_mutex);
1253 if (bdev->bd_fsfreeze_count > 0) {
1254 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1255 warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
1256 blkdev_put(bdev, mode);
1257 return -EBUSY;
1258 }
1259
1260 fc->sb_flags |= SB_NOSEC;
1261 fc->sget_key = bdev;
1262 s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
1263 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1264 if (IS_ERR(s)) {
1265 blkdev_put(bdev, mode);
1266 return PTR_ERR(s);
1267 }
1268
1269 if (s->s_root) {
1270
1271 if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
1272 warnf(fc, "%pg: Can't mount, would change RO state", bdev);
1273 deactivate_locked_super(s);
1274 blkdev_put(bdev, mode);
1275 return -EBUSY;
1276 }
1277
1278
1279
1280
1281
1282
1283
1284
1285 up_write(&s->s_umount);
1286 blkdev_put(bdev, mode);
1287 down_write(&s->s_umount);
1288 } else {
1289 s->s_mode = mode;
1290 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1291 sb_set_blocksize(s, block_size(bdev));
1292 error = fill_super(s, fc);
1293 if (error) {
1294 deactivate_locked_super(s);
1295 return error;
1296 }
1297
1298 s->s_flags |= SB_ACTIVE;
1299 bdev->bd_super = s;
1300 }
1301
1302 BUG_ON(fc->root);
1303 fc->root = dget(s->s_root);
1304 return 0;
1305}
1306EXPORT_SYMBOL(get_tree_bdev);
1307
1308static int test_bdev_super(struct super_block *s, void *data)
1309{
1310 return (void *)s->s_bdev == data;
1311}
1312
1313struct dentry *mount_bdev(struct file_system_type *fs_type,
1314 int flags, const char *dev_name, void *data,
1315 int (*fill_super)(struct super_block *, void *, int))
1316{
1317 struct block_device *bdev;
1318 struct super_block *s;
1319 fmode_t mode = FMODE_READ | FMODE_EXCL;
1320 int error = 0;
1321
1322 if (!(flags & SB_RDONLY))
1323 mode |= FMODE_WRITE;
1324
1325 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1326 if (IS_ERR(bdev))
1327 return ERR_CAST(bdev);
1328
1329
1330
1331
1332
1333
1334 mutex_lock(&bdev->bd_fsfreeze_mutex);
1335 if (bdev->bd_fsfreeze_count > 0) {
1336 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1337 error = -EBUSY;
1338 goto error_bdev;
1339 }
1340 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
1341 bdev);
1342 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1343 if (IS_ERR(s))
1344 goto error_s;
1345
1346 if (s->s_root) {
1347 if ((flags ^ s->s_flags) & SB_RDONLY) {
1348 deactivate_locked_super(s);
1349 error = -EBUSY;
1350 goto error_bdev;
1351 }
1352
1353
1354
1355
1356
1357
1358
1359
1360 up_write(&s->s_umount);
1361 blkdev_put(bdev, mode);
1362 down_write(&s->s_umount);
1363 } else {
1364 s->s_mode = mode;
1365 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1366 sb_set_blocksize(s, block_size(bdev));
1367 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1368 if (error) {
1369 deactivate_locked_super(s);
1370 goto error;
1371 }
1372
1373 s->s_flags |= SB_ACTIVE;
1374 bdev->bd_super = s;
1375 }
1376
1377 return dget(s->s_root);
1378
1379error_s:
1380 error = PTR_ERR(s);
1381error_bdev:
1382 blkdev_put(bdev, mode);
1383error:
1384 return ERR_PTR(error);
1385}
1386EXPORT_SYMBOL(mount_bdev);
1387
1388void kill_block_super(struct super_block *sb)
1389{
1390 struct block_device *bdev = sb->s_bdev;
1391 fmode_t mode = sb->s_mode;
1392
1393 bdev->bd_super = NULL;
1394 generic_shutdown_super(sb);
1395 sync_blockdev(bdev);
1396 WARN_ON_ONCE(!(mode & FMODE_EXCL));
1397 blkdev_put(bdev, mode | FMODE_EXCL);
1398}
1399
1400EXPORT_SYMBOL(kill_block_super);
1401#endif
1402
1403struct dentry *mount_nodev(struct file_system_type *fs_type,
1404 int flags, void *data,
1405 int (*fill_super)(struct super_block *, void *, int))
1406{
1407 int error;
1408 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1409
1410 if (IS_ERR(s))
1411 return ERR_CAST(s);
1412
1413 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1414 if (error) {
1415 deactivate_locked_super(s);
1416 return ERR_PTR(error);
1417 }
1418 s->s_flags |= SB_ACTIVE;
1419 return dget(s->s_root);
1420}
1421EXPORT_SYMBOL(mount_nodev);
1422
1423static int reconfigure_single(struct super_block *s,
1424 int flags, void *data)
1425{
1426 struct fs_context *fc;
1427 int ret;
1428
1429
1430
1431
1432
1433
1434 fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
1435 if (IS_ERR(fc))
1436 return PTR_ERR(fc);
1437
1438 ret = parse_monolithic_mount_data(fc, data);
1439 if (ret < 0)
1440 goto out;
1441
1442 ret = reconfigure_super(fc);
1443out:
1444 put_fs_context(fc);
1445 return ret;
1446}
1447
1448static int compare_single(struct super_block *s, void *p)
1449{
1450 return 1;
1451}
1452
1453struct dentry *mount_single(struct file_system_type *fs_type,
1454 int flags, void *data,
1455 int (*fill_super)(struct super_block *, void *, int))
1456{
1457 struct super_block *s;
1458 int error;
1459
1460 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1461 if (IS_ERR(s))
1462 return ERR_CAST(s);
1463 if (!s->s_root) {
1464 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1465 if (!error)
1466 s->s_flags |= SB_ACTIVE;
1467 } else {
1468 error = reconfigure_single(s, flags, data);
1469 }
1470 if (unlikely(error)) {
1471 deactivate_locked_super(s);
1472 return ERR_PTR(error);
1473 }
1474 return dget(s->s_root);
1475}
1476EXPORT_SYMBOL(mount_single);
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486int vfs_get_tree(struct fs_context *fc)
1487{
1488 struct super_block *sb;
1489 int error;
1490
1491 if (fc->root)
1492 return -EBUSY;
1493
1494
1495
1496
1497 error = fc->ops->get_tree(fc);
1498 if (error < 0)
1499 return error;
1500
1501 if (!fc->root) {
1502 pr_err("Filesystem %s get_tree() didn't set fc->root\n",
1503 fc->fs_type->name);
1504
1505
1506
1507 BUG();
1508 }
1509
1510 sb = fc->root->d_sb;
1511 WARN_ON(!sb->s_bdi);
1512
1513
1514
1515
1516
1517
1518
1519 smp_wmb();
1520 sb->s_flags |= SB_BORN;
1521
1522 error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
1523 if (unlikely(error)) {
1524 fc_drop_locked(fc);
1525 return error;
1526 }
1527
1528
1529
1530
1531
1532
1533
1534 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1535 "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
1536
1537 return 0;
1538}
1539EXPORT_SYMBOL(vfs_get_tree);
1540
1541
1542
1543
1544
1545int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
1546{
1547 struct backing_dev_info *bdi;
1548 int err;
1549 va_list args;
1550
1551 bdi = bdi_alloc(NUMA_NO_NODE);
1552 if (!bdi)
1553 return -ENOMEM;
1554
1555 va_start(args, fmt);
1556 err = bdi_register_va(bdi, fmt, args);
1557 va_end(args);
1558 if (err) {
1559 bdi_put(bdi);
1560 return err;
1561 }
1562 WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1563 sb->s_bdi = bdi;
1564
1565 return 0;
1566}
1567EXPORT_SYMBOL(super_setup_bdi_name);
1568
1569
1570
1571
1572
1573int super_setup_bdi(struct super_block *sb)
1574{
1575 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1576
1577 return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
1578 atomic_long_inc_return(&bdi_seq));
1579}
1580EXPORT_SYMBOL(super_setup_bdi);
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590static void sb_wait_write(struct super_block *sb, int level)
1591{
1592 percpu_down_write(sb->s_writers.rw_sem + level-1);
1593}
1594
1595
1596
1597
1598
1599static void lockdep_sb_freeze_release(struct super_block *sb)
1600{
1601 int level;
1602
1603 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1604 percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1605}
1606
1607
1608
1609
1610static void lockdep_sb_freeze_acquire(struct super_block *sb)
1611{
1612 int level;
1613
1614 for (level = 0; level < SB_FREEZE_LEVELS; ++level)
1615 percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1616}
1617
1618static void sb_freeze_unlock(struct super_block *sb)
1619{
1620 int level;
1621
1622 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1623 percpu_up_write(sb->s_writers.rw_sem + level);
1624}
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659int freeze_super(struct super_block *sb)
1660{
1661 int ret;
1662
1663 atomic_inc(&sb->s_active);
1664 down_write(&sb->s_umount);
1665 if (sb->s_writers.frozen != SB_UNFROZEN) {
1666 deactivate_locked_super(sb);
1667 return -EBUSY;
1668 }
1669
1670 if (!(sb->s_flags & SB_BORN)) {
1671 up_write(&sb->s_umount);
1672 return 0;
1673 }
1674
1675 if (sb_rdonly(sb)) {
1676
1677 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1678 up_write(&sb->s_umount);
1679 return 0;
1680 }
1681
1682 sb->s_writers.frozen = SB_FREEZE_WRITE;
1683
1684 up_write(&sb->s_umount);
1685 sb_wait_write(sb, SB_FREEZE_WRITE);
1686 down_write(&sb->s_umount);
1687
1688
1689 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1690 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1691
1692
1693 sync_filesystem(sb);
1694
1695
1696 sb->s_writers.frozen = SB_FREEZE_FS;
1697 sb_wait_write(sb, SB_FREEZE_FS);
1698
1699 if (sb->s_op->freeze_fs) {
1700 ret = sb->s_op->freeze_fs(sb);
1701 if (ret) {
1702 printk(KERN_ERR
1703 "VFS:Filesystem freeze failed\n");
1704 sb->s_writers.frozen = SB_UNFROZEN;
1705 sb_freeze_unlock(sb);
1706 wake_up(&sb->s_writers.wait_unfrozen);
1707 deactivate_locked_super(sb);
1708 return ret;
1709 }
1710 }
1711
1712
1713
1714
1715 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1716 lockdep_sb_freeze_release(sb);
1717 up_write(&sb->s_umount);
1718 return 0;
1719}
1720EXPORT_SYMBOL(freeze_super);
1721
1722static int thaw_super_locked(struct super_block *sb)
1723{
1724 int error;
1725
1726 if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
1727 up_write(&sb->s_umount);
1728 return -EINVAL;
1729 }
1730
1731 if (sb_rdonly(sb)) {
1732 sb->s_writers.frozen = SB_UNFROZEN;
1733 goto out;
1734 }
1735
1736 lockdep_sb_freeze_acquire(sb);
1737
1738 if (sb->s_op->unfreeze_fs) {
1739 error = sb->s_op->unfreeze_fs(sb);
1740 if (error) {
1741 printk(KERN_ERR
1742 "VFS:Filesystem thaw failed\n");
1743 lockdep_sb_freeze_release(sb);
1744 up_write(&sb->s_umount);
1745 return error;
1746 }
1747 }
1748
1749 sb->s_writers.frozen = SB_UNFROZEN;
1750 sb_freeze_unlock(sb);
1751out:
1752 wake_up(&sb->s_writers.wait_unfrozen);
1753 deactivate_locked_super(sb);
1754 return 0;
1755}
1756
1757
1758
1759
1760
1761
1762
1763int thaw_super(struct super_block *sb)
1764{
1765 down_write(&sb->s_umount);
1766 return thaw_super_locked(sb);
1767}
1768EXPORT_SYMBOL(thaw_super);
1769