1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/export.h>
25#include <linux/slab.h>
26#include <linux/blkdev.h>
27#include <linux/mount.h>
28#include <linux/security.h>
29#include <linux/writeback.h>
30#include <linux/idr.h>
31#include <linux/mutex.h>
32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h>
35#include <linux/fsnotify.h>
36#include <linux/lockdep.h>
37#include <linux/user_namespace.h>
38#include <linux/fs_context.h>
39#include <uapi/linux/mount.h>
40#include "internal.h"
41
42static int thaw_super_locked(struct super_block *sb);
43
44static LIST_HEAD(super_blocks);
45static DEFINE_SPINLOCK(sb_lock);
46
47static char *sb_writers_name[SB_FREEZE_LEVELS] = {
48 "sb_writers",
49 "sb_pagefaults",
50 "sb_internal",
51};
52
53
54
55
56
57
58
59
60static unsigned long super_cache_scan(struct shrinker *shrink,
61 struct shrink_control *sc)
62{
63 struct super_block *sb;
64 long fs_objects = 0;
65 long total_objects;
66 long freed = 0;
67 long dentries;
68 long inodes;
69
70 sb = container_of(shrink, struct super_block, s_shrink);
71
72
73
74
75
76 if (!(sc->gfp_mask & __GFP_FS))
77 return SHRINK_STOP;
78
79 if (!trylock_super(sb))
80 return SHRINK_STOP;
81
82 if (sb->s_op->nr_cached_objects)
83 fs_objects = sb->s_op->nr_cached_objects(sb, sc);
84
85 inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
86 dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
87 total_objects = dentries + inodes + fs_objects + 1;
88 if (!total_objects)
89 total_objects = 1;
90
91
92 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
93 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
94 fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
95
96
97
98
99
100
101
102
103 sc->nr_to_scan = dentries + 1;
104 freed = prune_dcache_sb(sb, sc);
105 sc->nr_to_scan = inodes + 1;
106 freed += prune_icache_sb(sb, sc);
107
108 if (fs_objects) {
109 sc->nr_to_scan = fs_objects + 1;
110 freed += sb->s_op->free_cached_objects(sb, sc);
111 }
112
113 up_read(&sb->s_umount);
114 return freed;
115}
116
117static unsigned long super_cache_count(struct shrinker *shrink,
118 struct shrink_control *sc)
119{
120 struct super_block *sb;
121 long total_objects = 0;
122
123 sb = container_of(shrink, struct super_block, s_shrink);
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139 if (!(sb->s_flags & SB_BORN))
140 return 0;
141 smp_rmb();
142
143 if (sb->s_op && sb->s_op->nr_cached_objects)
144 total_objects = sb->s_op->nr_cached_objects(sb, sc);
145
146 total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
147 total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
148
149 if (!total_objects)
150 return SHRINK_EMPTY;
151
152 total_objects = vfs_pressure_ratio(total_objects);
153 return total_objects;
154}
155
156static void destroy_super_work(struct work_struct *work)
157{
158 struct super_block *s = container_of(work, struct super_block,
159 destroy_work);
160 int i;
161
162 for (i = 0; i < SB_FREEZE_LEVELS; i++)
163 percpu_free_rwsem(&s->s_writers.rw_sem[i]);
164 kfree(s);
165}
166
167static void destroy_super_rcu(struct rcu_head *head)
168{
169 struct super_block *s = container_of(head, struct super_block, rcu);
170 INIT_WORK(&s->destroy_work, destroy_super_work);
171 schedule_work(&s->destroy_work);
172}
173
174
175static void destroy_unused_super(struct super_block *s)
176{
177 if (!s)
178 return;
179 up_write(&s->s_umount);
180 list_lru_destroy(&s->s_dentry_lru);
181 list_lru_destroy(&s->s_inode_lru);
182 security_sb_free(s);
183 put_user_ns(s->s_user_ns);
184 kfree(s->s_subtype);
185 free_prealloced_shrinker(&s->s_shrink);
186
187 destroy_super_work(&s->destroy_work);
188}
189
190
191
192
193
194
195
196
197
198
199static struct super_block *alloc_super(struct file_system_type *type, int flags,
200 struct user_namespace *user_ns)
201{
202 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
203 static const struct super_operations default_op;
204 int i;
205
206 if (!s)
207 return NULL;
208
209 INIT_LIST_HEAD(&s->s_mounts);
210 s->s_user_ns = get_user_ns(user_ns);
211 init_rwsem(&s->s_umount);
212 lockdep_set_class(&s->s_umount, &type->s_umount_key);
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
229
230 if (security_sb_alloc(s))
231 goto fail;
232
233 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
234 if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
235 sb_writers_name[i],
236 &type->s_writers_key[i]))
237 goto fail;
238 }
239 init_waitqueue_head(&s->s_writers.wait_unfrozen);
240 s->s_bdi = &noop_backing_dev_info;
241 s->s_flags = flags;
242 if (s->s_user_ns != &init_user_ns)
243 s->s_iflags |= SB_I_NODEV;
244 INIT_HLIST_NODE(&s->s_instances);
245 INIT_HLIST_BL_HEAD(&s->s_roots);
246 mutex_init(&s->s_sync_lock);
247 INIT_LIST_HEAD(&s->s_inodes);
248 spin_lock_init(&s->s_inode_list_lock);
249 INIT_LIST_HEAD(&s->s_inodes_wb);
250 spin_lock_init(&s->s_inode_wblist_lock);
251
252 s->s_count = 1;
253 atomic_set(&s->s_active, 1);
254 mutex_init(&s->s_vfs_rename_mutex);
255 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
256 init_rwsem(&s->s_dquot.dqio_sem);
257 s->s_maxbytes = MAX_NON_LFS;
258 s->s_op = &default_op;
259 s->s_time_gran = 1000000000;
260 s->cleancache_poolid = CLEANCACHE_NO_POOL;
261
262 s->s_shrink.seeks = DEFAULT_SEEKS;
263 s->s_shrink.scan_objects = super_cache_scan;
264 s->s_shrink.count_objects = super_cache_count;
265 s->s_shrink.batch = 1024;
266 s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
267 if (prealloc_shrinker(&s->s_shrink))
268 goto fail;
269 if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
270 goto fail;
271 if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
272 goto fail;
273 return s;
274
275fail:
276 destroy_unused_super(s);
277 return NULL;
278}
279
280
281
282
283
284
285static void __put_super(struct super_block *s)
286{
287 if (!--s->s_count) {
288 list_del_init(&s->s_list);
289 WARN_ON(s->s_dentry_lru.node);
290 WARN_ON(s->s_inode_lru.node);
291 WARN_ON(!list_empty(&s->s_mounts));
292 security_sb_free(s);
293 put_user_ns(s->s_user_ns);
294 kfree(s->s_subtype);
295 call_rcu(&s->rcu, destroy_super_rcu);
296 }
297}
298
299
300
301
302
303
304
305
306static void put_super(struct super_block *sb)
307{
308 spin_lock(&sb_lock);
309 __put_super(sb);
310 spin_unlock(&sb_lock);
311}
312
313
314
315
316
317
318
319
320
321
322
323
324
325void deactivate_locked_super(struct super_block *s)
326{
327 struct file_system_type *fs = s->s_type;
328 if (atomic_dec_and_test(&s->s_active)) {
329 cleancache_invalidate_fs(s);
330 unregister_shrinker(&s->s_shrink);
331 fs->kill_sb(s);
332
333
334
335
336
337
338 list_lru_destroy(&s->s_dentry_lru);
339 list_lru_destroy(&s->s_inode_lru);
340
341 put_filesystem(fs);
342 put_super(s);
343 } else {
344 up_write(&s->s_umount);
345 }
346}
347
348EXPORT_SYMBOL(deactivate_locked_super);
349
350
351
352
353
354
355
356
357
358void deactivate_super(struct super_block *s)
359{
360 if (!atomic_add_unless(&s->s_active, -1, 1)) {
361 down_write(&s->s_umount);
362 deactivate_locked_super(s);
363 }
364}
365
366EXPORT_SYMBOL(deactivate_super);
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381static int grab_super(struct super_block *s) __releases(sb_lock)
382{
383 s->s_count++;
384 spin_unlock(&sb_lock);
385 down_write(&s->s_umount);
386 if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
387 put_super(s);
388 return 1;
389 }
390 up_write(&s->s_umount);
391 put_super(s);
392 return 0;
393}
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412bool trylock_super(struct super_block *sb)
413{
414 if (down_read_trylock(&sb->s_umount)) {
415 if (!hlist_unhashed(&sb->s_instances) &&
416 sb->s_root && (sb->s_flags & SB_BORN))
417 return true;
418 up_read(&sb->s_umount);
419 }
420
421 return false;
422}
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438void generic_shutdown_super(struct super_block *sb)
439{
440 const struct super_operations *sop = sb->s_op;
441
442 if (sb->s_root) {
443 shrink_dcache_for_umount(sb);
444 sync_filesystem(sb);
445 sb->s_flags &= ~SB_ACTIVE;
446
447 fsnotify_sb_delete(sb);
448 cgroup_writeback_umount();
449
450 evict_inodes(sb);
451
452 if (sb->s_dio_done_wq) {
453 destroy_workqueue(sb->s_dio_done_wq);
454 sb->s_dio_done_wq = NULL;
455 }
456
457 if (sop->put_super)
458 sop->put_super(sb);
459
460 if (!list_empty(&sb->s_inodes)) {
461 printk("VFS: Busy inodes after unmount of %s. "
462 "Self-destruct in 5 seconds. Have a nice day...\n",
463 sb->s_id);
464 }
465 }
466 spin_lock(&sb_lock);
467
468 hlist_del_init(&sb->s_instances);
469 spin_unlock(&sb_lock);
470 up_write(&sb->s_umount);
471 if (sb->s_bdi != &noop_backing_dev_info) {
472 bdi_put(sb->s_bdi);
473 sb->s_bdi = &noop_backing_dev_info;
474 }
475}
476
477EXPORT_SYMBOL(generic_shutdown_super);
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497struct super_block *sget_fc(struct fs_context *fc,
498 int (*test)(struct super_block *, struct fs_context *),
499 int (*set)(struct super_block *, struct fs_context *))
500{
501 struct super_block *s = NULL;
502 struct super_block *old;
503 struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
504 int err;
505
506 if (!(fc->sb_flags & SB_KERNMOUNT) &&
507 fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) {
508
509
510
511 if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
512 if (!capable(CAP_SYS_ADMIN))
513 return ERR_PTR(-EPERM);
514 } else {
515 if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN))
516 return ERR_PTR(-EPERM);
517 }
518 }
519
520retry:
521 spin_lock(&sb_lock);
522 if (test) {
523 hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
524 if (test(old, fc))
525 goto share_extant_sb;
526 }
527 }
528 if (!s) {
529 spin_unlock(&sb_lock);
530 s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
531 if (!s)
532 return ERR_PTR(-ENOMEM);
533 goto retry;
534 }
535
536 s->s_fs_info = fc->s_fs_info;
537 err = set(s, fc);
538 if (err) {
539 s->s_fs_info = NULL;
540 spin_unlock(&sb_lock);
541 destroy_unused_super(s);
542 return ERR_PTR(err);
543 }
544 fc->s_fs_info = NULL;
545 s->s_type = fc->fs_type;
546 strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
547 list_add_tail(&s->s_list, &super_blocks);
548 hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
549 spin_unlock(&sb_lock);
550 get_filesystem(s->s_type);
551 register_shrinker_prepared(&s->s_shrink);
552 return s;
553
554share_extant_sb:
555 if (user_ns != old->s_user_ns) {
556 spin_unlock(&sb_lock);
557 destroy_unused_super(s);
558 return ERR_PTR(-EBUSY);
559 }
560 if (!grab_super(old))
561 goto retry;
562 destroy_unused_super(s);
563 return old;
564}
565EXPORT_SYMBOL(sget_fc);
566
567
568
569
570
571
572
573
574
575
576struct super_block *sget_userns(struct file_system_type *type,
577 int (*test)(struct super_block *,void *),
578 int (*set)(struct super_block *,void *),
579 int flags, struct user_namespace *user_ns,
580 void *data)
581{
582 struct super_block *s = NULL;
583 struct super_block *old;
584 int err;
585
586 if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) &&
587 !(type->fs_flags & FS_USERNS_MOUNT) &&
588 !capable(CAP_SYS_ADMIN))
589 return ERR_PTR(-EPERM);
590retry:
591 spin_lock(&sb_lock);
592 if (test) {
593 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
594 if (!test(old, data))
595 continue;
596 if (user_ns != old->s_user_ns) {
597 spin_unlock(&sb_lock);
598 destroy_unused_super(s);
599 return ERR_PTR(-EBUSY);
600 }
601 if (!grab_super(old))
602 goto retry;
603 destroy_unused_super(s);
604 return old;
605 }
606 }
607 if (!s) {
608 spin_unlock(&sb_lock);
609 s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
610 if (!s)
611 return ERR_PTR(-ENOMEM);
612 goto retry;
613 }
614
615 err = set(s, data);
616 if (err) {
617 spin_unlock(&sb_lock);
618 destroy_unused_super(s);
619 return ERR_PTR(err);
620 }
621 s->s_type = type;
622 strlcpy(s->s_id, type->name, sizeof(s->s_id));
623 list_add_tail(&s->s_list, &super_blocks);
624 hlist_add_head(&s->s_instances, &type->fs_supers);
625 spin_unlock(&sb_lock);
626 get_filesystem(type);
627 register_shrinker_prepared(&s->s_shrink);
628 return s;
629}
630
631EXPORT_SYMBOL(sget_userns);
632
633
634
635
636
637
638
639
640
641struct super_block *sget(struct file_system_type *type,
642 int (*test)(struct super_block *,void *),
643 int (*set)(struct super_block *,void *),
644 int flags,
645 void *data)
646{
647 struct user_namespace *user_ns = current_user_ns();
648
649
650
651
652
653 if (flags & SB_SUBMOUNT)
654 user_ns = &init_user_ns;
655
656
657 if (!(flags & (SB_KERNMOUNT|SB_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
658 return ERR_PTR(-EPERM);
659
660 return sget_userns(type, test, set, flags, user_ns, data);
661}
662
663EXPORT_SYMBOL(sget);
664
665void drop_super(struct super_block *sb)
666{
667 up_read(&sb->s_umount);
668 put_super(sb);
669}
670
671EXPORT_SYMBOL(drop_super);
672
673void drop_super_exclusive(struct super_block *sb)
674{
675 up_write(&sb->s_umount);
676 put_super(sb);
677}
678EXPORT_SYMBOL(drop_super_exclusive);
679
680static void __iterate_supers(void (*f)(struct super_block *))
681{
682 struct super_block *sb, *p = NULL;
683
684 spin_lock(&sb_lock);
685 list_for_each_entry(sb, &super_blocks, s_list) {
686 if (hlist_unhashed(&sb->s_instances))
687 continue;
688 sb->s_count++;
689 spin_unlock(&sb_lock);
690
691 f(sb);
692
693 spin_lock(&sb_lock);
694 if (p)
695 __put_super(p);
696 p = sb;
697 }
698 if (p)
699 __put_super(p);
700 spin_unlock(&sb_lock);
701}
702
703
704
705
706
707
708
709
710void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
711{
712 struct super_block *sb, *p = NULL;
713
714 spin_lock(&sb_lock);
715 list_for_each_entry(sb, &super_blocks, s_list) {
716 if (hlist_unhashed(&sb->s_instances))
717 continue;
718 sb->s_count++;
719 spin_unlock(&sb_lock);
720
721 down_read(&sb->s_umount);
722 if (sb->s_root && (sb->s_flags & SB_BORN))
723 f(sb, arg);
724 up_read(&sb->s_umount);
725
726 spin_lock(&sb_lock);
727 if (p)
728 __put_super(p);
729 p = sb;
730 }
731 if (p)
732 __put_super(p);
733 spin_unlock(&sb_lock);
734}
735
736
737
738
739
740
741
742
743
744
745void iterate_supers_type(struct file_system_type *type,
746 void (*f)(struct super_block *, void *), void *arg)
747{
748 struct super_block *sb, *p = NULL;
749
750 spin_lock(&sb_lock);
751 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
752 sb->s_count++;
753 spin_unlock(&sb_lock);
754
755 down_read(&sb->s_umount);
756 if (sb->s_root && (sb->s_flags & SB_BORN))
757 f(sb, arg);
758 up_read(&sb->s_umount);
759
760 spin_lock(&sb_lock);
761 if (p)
762 __put_super(p);
763 p = sb;
764 }
765 if (p)
766 __put_super(p);
767 spin_unlock(&sb_lock);
768}
769
770EXPORT_SYMBOL(iterate_supers_type);
771
772static struct super_block *__get_super(struct block_device *bdev, bool excl)
773{
774 struct super_block *sb;
775
776 if (!bdev)
777 return NULL;
778
779 spin_lock(&sb_lock);
780rescan:
781 list_for_each_entry(sb, &super_blocks, s_list) {
782 if (hlist_unhashed(&sb->s_instances))
783 continue;
784 if (sb->s_bdev == bdev) {
785 sb->s_count++;
786 spin_unlock(&sb_lock);
787 if (!excl)
788 down_read(&sb->s_umount);
789 else
790 down_write(&sb->s_umount);
791
792 if (sb->s_root && (sb->s_flags & SB_BORN))
793 return sb;
794 if (!excl)
795 up_read(&sb->s_umount);
796 else
797 up_write(&sb->s_umount);
798
799 spin_lock(&sb_lock);
800 __put_super(sb);
801 goto rescan;
802 }
803 }
804 spin_unlock(&sb_lock);
805 return NULL;
806}
807
808
809
810
811
812
813
814
815struct super_block *get_super(struct block_device *bdev)
816{
817 return __get_super(bdev, false);
818}
819EXPORT_SYMBOL(get_super);
820
821static struct super_block *__get_super_thawed(struct block_device *bdev,
822 bool excl)
823{
824 while (1) {
825 struct super_block *s = __get_super(bdev, excl);
826 if (!s || s->s_writers.frozen == SB_UNFROZEN)
827 return s;
828 if (!excl)
829 up_read(&s->s_umount);
830 else
831 up_write(&s->s_umount);
832 wait_event(s->s_writers.wait_unfrozen,
833 s->s_writers.frozen == SB_UNFROZEN);
834 put_super(s);
835 }
836}
837
838
839
840
841
842
843
844
845
846
847struct super_block *get_super_thawed(struct block_device *bdev)
848{
849 return __get_super_thawed(bdev, false);
850}
851EXPORT_SYMBOL(get_super_thawed);
852
853
854
855
856
857
858
859
860
861
862struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
863{
864 return __get_super_thawed(bdev, true);
865}
866EXPORT_SYMBOL(get_super_exclusive_thawed);
867
868
869
870
871
872
873
874
875
876struct super_block *get_active_super(struct block_device *bdev)
877{
878 struct super_block *sb;
879
880 if (!bdev)
881 return NULL;
882
883restart:
884 spin_lock(&sb_lock);
885 list_for_each_entry(sb, &super_blocks, s_list) {
886 if (hlist_unhashed(&sb->s_instances))
887 continue;
888 if (sb->s_bdev == bdev) {
889 if (!grab_super(sb))
890 goto restart;
891 up_write(&sb->s_umount);
892 return sb;
893 }
894 }
895 spin_unlock(&sb_lock);
896 return NULL;
897}
898
899struct super_block *user_get_super(dev_t dev)
900{
901 struct super_block *sb;
902
903 spin_lock(&sb_lock);
904rescan:
905 list_for_each_entry(sb, &super_blocks, s_list) {
906 if (hlist_unhashed(&sb->s_instances))
907 continue;
908 if (sb->s_dev == dev) {
909 sb->s_count++;
910 spin_unlock(&sb_lock);
911 down_read(&sb->s_umount);
912
913 if (sb->s_root && (sb->s_flags & SB_BORN))
914 return sb;
915 up_read(&sb->s_umount);
916
917 spin_lock(&sb_lock);
918 __put_super(sb);
919 goto rescan;
920 }
921 }
922 spin_unlock(&sb_lock);
923 return NULL;
924}
925
926
927
928
929
930
931
932int reconfigure_super(struct fs_context *fc)
933{
934 struct super_block *sb = fc->root->d_sb;
935 int retval;
936 bool remount_ro = false;
937 bool force = fc->sb_flags & SB_FORCE;
938
939 if (fc->sb_flags_mask & ~MS_RMT_MASK)
940 return -EINVAL;
941 if (sb->s_writers.frozen != SB_UNFROZEN)
942 return -EBUSY;
943
944 retval = security_sb_remount(sb, fc->security);
945 if (retval)
946 return retval;
947
948 if (fc->sb_flags_mask & SB_RDONLY) {
949#ifdef CONFIG_BLOCK
950 if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev))
951 return -EACCES;
952#endif
953
954 remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
955 }
956
957 if (remount_ro) {
958 if (!hlist_empty(&sb->s_pins)) {
959 up_write(&sb->s_umount);
960 group_pin_kill(&sb->s_pins);
961 down_write(&sb->s_umount);
962 if (!sb->s_root)
963 return 0;
964 if (sb->s_writers.frozen != SB_UNFROZEN)
965 return -EBUSY;
966 remount_ro = !sb_rdonly(sb);
967 }
968 }
969 shrink_dcache_sb(sb);
970
971
972
973
974 if (remount_ro) {
975 if (force) {
976 sb->s_readonly_remount = 1;
977 smp_wmb();
978 } else {
979 retval = sb_prepare_remount_readonly(sb);
980 if (retval)
981 return retval;
982 }
983 }
984
985 if (fc->ops->reconfigure) {
986 retval = fc->ops->reconfigure(fc);
987 if (retval) {
988 if (!force)
989 goto cancel_readonly;
990
991 WARN(1, "forced remount of a %s fs returned %i\n",
992 sb->s_type->name, retval);
993 }
994 }
995
996 WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
997 (fc->sb_flags & fc->sb_flags_mask)));
998
999 smp_wmb();
1000 sb->s_readonly_remount = 0;
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 if (remount_ro && sb->s_bdev)
1011 invalidate_bdev(sb->s_bdev);
1012 return 0;
1013
1014cancel_readonly:
1015 sb->s_readonly_remount = 0;
1016 return retval;
1017}
1018
1019static void do_emergency_remount_callback(struct super_block *sb)
1020{
1021 down_write(&sb->s_umount);
1022 if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
1023 !sb_rdonly(sb)) {
1024 struct fs_context *fc;
1025
1026 fc = fs_context_for_reconfigure(sb->s_root,
1027 SB_RDONLY | SB_FORCE, SB_RDONLY);
1028 if (!IS_ERR(fc)) {
1029 if (parse_monolithic_mount_data(fc, NULL) == 0)
1030 (void)reconfigure_super(fc);
1031 put_fs_context(fc);
1032 }
1033 }
1034 up_write(&sb->s_umount);
1035}
1036
1037static void do_emergency_remount(struct work_struct *work)
1038{
1039 __iterate_supers(do_emergency_remount_callback);
1040 kfree(work);
1041 printk("Emergency Remount complete\n");
1042}
1043
1044void emergency_remount(void)
1045{
1046 struct work_struct *work;
1047
1048 work = kmalloc(sizeof(*work), GFP_ATOMIC);
1049 if (work) {
1050 INIT_WORK(work, do_emergency_remount);
1051 schedule_work(work);
1052 }
1053}
1054
1055static void do_thaw_all_callback(struct super_block *sb)
1056{
1057 down_write(&sb->s_umount);
1058 if (sb->s_root && sb->s_flags & SB_BORN) {
1059 emergency_thaw_bdev(sb);
1060 thaw_super_locked(sb);
1061 } else {
1062 up_write(&sb->s_umount);
1063 }
1064}
1065
1066static void do_thaw_all(struct work_struct *work)
1067{
1068 __iterate_supers(do_thaw_all_callback);
1069 kfree(work);
1070 printk(KERN_WARNING "Emergency Thaw complete\n");
1071}
1072
1073
1074
1075
1076
1077
1078void emergency_thaw_all(void)
1079{
1080 struct work_struct *work;
1081
1082 work = kmalloc(sizeof(*work), GFP_ATOMIC);
1083 if (work) {
1084 INIT_WORK(work, do_thaw_all);
1085 schedule_work(work);
1086 }
1087}
1088
1089static DEFINE_IDA(unnamed_dev_ida);
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102int get_anon_bdev(dev_t *p)
1103{
1104 int dev;
1105
1106
1107
1108
1109
1110 dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1,
1111 GFP_ATOMIC);
1112 if (dev == -ENOSPC)
1113 dev = -EMFILE;
1114 if (dev < 0)
1115 return dev;
1116
1117 *p = MKDEV(0, dev);
1118 return 0;
1119}
1120EXPORT_SYMBOL(get_anon_bdev);
1121
1122void free_anon_bdev(dev_t dev)
1123{
1124 ida_free(&unnamed_dev_ida, MINOR(dev));
1125}
1126EXPORT_SYMBOL(free_anon_bdev);
1127
1128int set_anon_super(struct super_block *s, void *data)
1129{
1130 return get_anon_bdev(&s->s_dev);
1131}
1132EXPORT_SYMBOL(set_anon_super);
1133
1134void kill_anon_super(struct super_block *sb)
1135{
1136 dev_t dev = sb->s_dev;
1137 generic_shutdown_super(sb);
1138 free_anon_bdev(dev);
1139}
1140EXPORT_SYMBOL(kill_anon_super);
1141
1142void kill_litter_super(struct super_block *sb)
1143{
1144 if (sb->s_root)
1145 d_genocide(sb->s_root);
1146 kill_anon_super(sb);
1147}
1148EXPORT_SYMBOL(kill_litter_super);
1149
1150static int ns_test_super(struct super_block *sb, void *data)
1151{
1152 return sb->s_fs_info == data;
1153}
1154
1155static int ns_set_super(struct super_block *sb, void *data)
1156{
1157 sb->s_fs_info = data;
1158 return set_anon_super(sb, NULL);
1159}
1160
1161struct dentry *mount_ns(struct file_system_type *fs_type,
1162 int flags, void *data, void *ns, struct user_namespace *user_ns,
1163 int (*fill_super)(struct super_block *, void *, int))
1164{
1165 struct super_block *sb;
1166
1167
1168
1169
1170 if (!(flags & SB_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
1171 return ERR_PTR(-EPERM);
1172
1173 sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
1174 user_ns, ns);
1175 if (IS_ERR(sb))
1176 return ERR_CAST(sb);
1177
1178 if (!sb->s_root) {
1179 int err;
1180 err = fill_super(sb, data, flags & SB_SILENT ? 1 : 0);
1181 if (err) {
1182 deactivate_locked_super(sb);
1183 return ERR_PTR(err);
1184 }
1185
1186 sb->s_flags |= SB_ACTIVE;
1187 }
1188
1189 return dget(sb->s_root);
1190}
1191
1192EXPORT_SYMBOL(mount_ns);
1193
1194int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
1195{
1196 return set_anon_super(sb, NULL);
1197}
1198EXPORT_SYMBOL(set_anon_super_fc);
1199
1200static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
1201{
1202 return sb->s_fs_info == fc->s_fs_info;
1203}
1204
1205static int test_single_super(struct super_block *s, struct fs_context *fc)
1206{
1207 return 1;
1208}
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235int vfs_get_super(struct fs_context *fc,
1236 enum vfs_get_super_keying keying,
1237 int (*fill_super)(struct super_block *sb,
1238 struct fs_context *fc))
1239{
1240 int (*test)(struct super_block *, struct fs_context *);
1241 struct super_block *sb;
1242
1243 switch (keying) {
1244 case vfs_get_single_super:
1245 test = test_single_super;
1246 break;
1247 case vfs_get_keyed_super:
1248 test = test_keyed_super;
1249 break;
1250 case vfs_get_independent_super:
1251 test = NULL;
1252 break;
1253 default:
1254 BUG();
1255 }
1256
1257 sb = sget_fc(fc, test, set_anon_super_fc);
1258 if (IS_ERR(sb))
1259 return PTR_ERR(sb);
1260
1261 if (!sb->s_root) {
1262 int err = fill_super(sb, fc);
1263 if (err) {
1264 deactivate_locked_super(sb);
1265 return err;
1266 }
1267
1268 sb->s_flags |= SB_ACTIVE;
1269 }
1270
1271 BUG_ON(fc->root);
1272 fc->root = dget(sb->s_root);
1273 return 0;
1274}
1275EXPORT_SYMBOL(vfs_get_super);
1276
1277#ifdef CONFIG_BLOCK
1278static int set_bdev_super(struct super_block *s, void *data)
1279{
1280 s->s_bdev = data;
1281 s->s_dev = s->s_bdev->bd_dev;
1282 s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
1283
1284 return 0;
1285}
1286
1287static int test_bdev_super(struct super_block *s, void *data)
1288{
1289 return (void *)s->s_bdev == data;
1290}
1291
1292struct dentry *mount_bdev(struct file_system_type *fs_type,
1293 int flags, const char *dev_name, void *data,
1294 int (*fill_super)(struct super_block *, void *, int))
1295{
1296 struct block_device *bdev;
1297 struct super_block *s;
1298 fmode_t mode = FMODE_READ | FMODE_EXCL;
1299 int error = 0;
1300
1301 if (!(flags & SB_RDONLY))
1302 mode |= FMODE_WRITE;
1303
1304 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1305 if (IS_ERR(bdev))
1306 return ERR_CAST(bdev);
1307
1308
1309
1310
1311
1312
1313 mutex_lock(&bdev->bd_fsfreeze_mutex);
1314 if (bdev->bd_fsfreeze_count > 0) {
1315 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1316 error = -EBUSY;
1317 goto error_bdev;
1318 }
1319 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
1320 bdev);
1321 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1322 if (IS_ERR(s))
1323 goto error_s;
1324
1325 if (s->s_root) {
1326 if ((flags ^ s->s_flags) & SB_RDONLY) {
1327 deactivate_locked_super(s);
1328 error = -EBUSY;
1329 goto error_bdev;
1330 }
1331
1332
1333
1334
1335
1336
1337
1338
1339 up_write(&s->s_umount);
1340 blkdev_put(bdev, mode);
1341 down_write(&s->s_umount);
1342 } else {
1343 s->s_mode = mode;
1344 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1345 sb_set_blocksize(s, block_size(bdev));
1346 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1347 if (error) {
1348 deactivate_locked_super(s);
1349 goto error;
1350 }
1351
1352 s->s_flags |= SB_ACTIVE;
1353 bdev->bd_super = s;
1354 }
1355
1356 return dget(s->s_root);
1357
1358error_s:
1359 error = PTR_ERR(s);
1360error_bdev:
1361 blkdev_put(bdev, mode);
1362error:
1363 return ERR_PTR(error);
1364}
1365EXPORT_SYMBOL(mount_bdev);
1366
1367void kill_block_super(struct super_block *sb)
1368{
1369 struct block_device *bdev = sb->s_bdev;
1370 fmode_t mode = sb->s_mode;
1371
1372 bdev->bd_super = NULL;
1373 generic_shutdown_super(sb);
1374 sync_blockdev(bdev);
1375 WARN_ON_ONCE(!(mode & FMODE_EXCL));
1376 blkdev_put(bdev, mode | FMODE_EXCL);
1377}
1378
1379EXPORT_SYMBOL(kill_block_super);
1380#endif
1381
1382struct dentry *mount_nodev(struct file_system_type *fs_type,
1383 int flags, void *data,
1384 int (*fill_super)(struct super_block *, void *, int))
1385{
1386 int error;
1387 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1388
1389 if (IS_ERR(s))
1390 return ERR_CAST(s);
1391
1392 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1393 if (error) {
1394 deactivate_locked_super(s);
1395 return ERR_PTR(error);
1396 }
1397 s->s_flags |= SB_ACTIVE;
1398 return dget(s->s_root);
1399}
1400EXPORT_SYMBOL(mount_nodev);
1401
1402static int reconfigure_single(struct super_block *s,
1403 int flags, void *data)
1404{
1405 struct fs_context *fc;
1406 int ret;
1407
1408
1409
1410
1411
1412
1413 fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
1414 if (IS_ERR(fc))
1415 return PTR_ERR(fc);
1416
1417 ret = parse_monolithic_mount_data(fc, data);
1418 if (ret < 0)
1419 goto out;
1420
1421 ret = reconfigure_super(fc);
1422out:
1423 put_fs_context(fc);
1424 return ret;
1425}
1426
1427static int compare_single(struct super_block *s, void *p)
1428{
1429 return 1;
1430}
1431
1432struct dentry *mount_single(struct file_system_type *fs_type,
1433 int flags, void *data,
1434 int (*fill_super)(struct super_block *, void *, int))
1435{
1436 struct super_block *s;
1437 int error;
1438
1439 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1440 if (IS_ERR(s))
1441 return ERR_CAST(s);
1442 if (!s->s_root) {
1443 error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
1444 if (!error)
1445 s->s_flags |= SB_ACTIVE;
1446 } else {
1447 error = reconfigure_single(s, flags, data);
1448 }
1449 if (unlikely(error)) {
1450 deactivate_locked_super(s);
1451 return ERR_PTR(error);
1452 }
1453 return dget(s->s_root);
1454}
1455EXPORT_SYMBOL(mount_single);
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465int vfs_get_tree(struct fs_context *fc)
1466{
1467 struct super_block *sb;
1468 int error;
1469
1470 if (fc->root)
1471 return -EBUSY;
1472
1473
1474
1475
1476 error = fc->ops->get_tree(fc);
1477 if (error < 0)
1478 return error;
1479
1480 if (!fc->root) {
1481 pr_err("Filesystem %s get_tree() didn't set fc->root\n",
1482 fc->fs_type->name);
1483
1484
1485
1486 BUG();
1487 }
1488
1489 sb = fc->root->d_sb;
1490 WARN_ON(!sb->s_bdi);
1491
1492 if (fc->subtype && !sb->s_subtype) {
1493 sb->s_subtype = fc->subtype;
1494 fc->subtype = NULL;
1495 }
1496
1497
1498
1499
1500
1501
1502
1503 smp_wmb();
1504 sb->s_flags |= SB_BORN;
1505
1506 error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
1507 if (unlikely(error)) {
1508 fc_drop_locked(fc);
1509 return error;
1510 }
1511
1512
1513
1514
1515
1516
1517
1518 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1519 "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
1520
1521 return 0;
1522}
1523EXPORT_SYMBOL(vfs_get_tree);
1524
1525
1526
1527
1528
1529int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
1530{
1531 struct backing_dev_info *bdi;
1532 int err;
1533 va_list args;
1534
1535 bdi = bdi_alloc(GFP_KERNEL);
1536 if (!bdi)
1537 return -ENOMEM;
1538
1539 bdi->name = sb->s_type->name;
1540
1541 va_start(args, fmt);
1542 err = bdi_register_va(bdi, fmt, args);
1543 va_end(args);
1544 if (err) {
1545 bdi_put(bdi);
1546 return err;
1547 }
1548 WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1549 sb->s_bdi = bdi;
1550
1551 return 0;
1552}
1553EXPORT_SYMBOL(super_setup_bdi_name);
1554
1555
1556
1557
1558
1559int super_setup_bdi(struct super_block *sb)
1560{
1561 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1562
1563 return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
1564 atomic_long_inc_return(&bdi_seq));
1565}
1566EXPORT_SYMBOL(super_setup_bdi);
1567
1568
1569
1570
1571
1572void __sb_end_write(struct super_block *sb, int level)
1573{
1574 percpu_up_read(sb->s_writers.rw_sem + level-1);
1575}
1576EXPORT_SYMBOL(__sb_end_write);
1577
1578
1579
1580
1581
1582int __sb_start_write(struct super_block *sb, int level, bool wait)
1583{
1584 bool force_trylock = false;
1585 int ret = 1;
1586
1587#ifdef CONFIG_LOCKDEP
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597 if (wait) {
1598 int i;
1599
1600 for (i = 0; i < level - 1; i++)
1601 if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
1602 force_trylock = true;
1603 break;
1604 }
1605 }
1606#endif
1607 if (wait && !force_trylock)
1608 percpu_down_read(sb->s_writers.rw_sem + level-1);
1609 else
1610 ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
1611
1612 WARN_ON(force_trylock && !ret);
1613 return ret;
1614}
1615EXPORT_SYMBOL(__sb_start_write);
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625static void sb_wait_write(struct super_block *sb, int level)
1626{
1627 percpu_down_write(sb->s_writers.rw_sem + level-1);
1628}
1629
1630
1631
1632
1633
1634static void lockdep_sb_freeze_release(struct super_block *sb)
1635{
1636 int level;
1637
1638 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1639 percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1640}
1641
1642
1643
1644
1645static void lockdep_sb_freeze_acquire(struct super_block *sb)
1646{
1647 int level;
1648
1649 for (level = 0; level < SB_FREEZE_LEVELS; ++level)
1650 percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1651}
1652
1653static void sb_freeze_unlock(struct super_block *sb)
1654{
1655 int level;
1656
1657 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1658 percpu_up_write(sb->s_writers.rw_sem + level);
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694int freeze_super(struct super_block *sb)
1695{
1696 int ret;
1697
1698 atomic_inc(&sb->s_active);
1699 down_write(&sb->s_umount);
1700 if (sb->s_writers.frozen != SB_UNFROZEN) {
1701 deactivate_locked_super(sb);
1702 return -EBUSY;
1703 }
1704
1705 if (!(sb->s_flags & SB_BORN)) {
1706 up_write(&sb->s_umount);
1707 return 0;
1708 }
1709
1710 if (sb_rdonly(sb)) {
1711
1712 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1713 up_write(&sb->s_umount);
1714 return 0;
1715 }
1716
1717 sb->s_writers.frozen = SB_FREEZE_WRITE;
1718
1719 up_write(&sb->s_umount);
1720 sb_wait_write(sb, SB_FREEZE_WRITE);
1721 down_write(&sb->s_umount);
1722
1723
1724 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1725 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1726
1727
1728 sync_filesystem(sb);
1729
1730
1731 sb->s_writers.frozen = SB_FREEZE_FS;
1732 sb_wait_write(sb, SB_FREEZE_FS);
1733
1734 if (sb->s_op->freeze_fs) {
1735 ret = sb->s_op->freeze_fs(sb);
1736 if (ret) {
1737 printk(KERN_ERR
1738 "VFS:Filesystem freeze failed\n");
1739 sb->s_writers.frozen = SB_UNFROZEN;
1740 sb_freeze_unlock(sb);
1741 wake_up(&sb->s_writers.wait_unfrozen);
1742 deactivate_locked_super(sb);
1743 return ret;
1744 }
1745 }
1746
1747
1748
1749
1750 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1751 lockdep_sb_freeze_release(sb);
1752 up_write(&sb->s_umount);
1753 return 0;
1754}
1755EXPORT_SYMBOL(freeze_super);
1756
1757
1758
1759
1760
1761
1762
1763static int thaw_super_locked(struct super_block *sb)
1764{
1765 int error;
1766
1767 if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
1768 up_write(&sb->s_umount);
1769 return -EINVAL;
1770 }
1771
1772 if (sb_rdonly(sb)) {
1773 sb->s_writers.frozen = SB_UNFROZEN;
1774 goto out;
1775 }
1776
1777 lockdep_sb_freeze_acquire(sb);
1778
1779 if (sb->s_op->unfreeze_fs) {
1780 error = sb->s_op->unfreeze_fs(sb);
1781 if (error) {
1782 printk(KERN_ERR
1783 "VFS:Filesystem thaw failed\n");
1784 lockdep_sb_freeze_release(sb);
1785 up_write(&sb->s_umount);
1786 return error;
1787 }
1788 }
1789
1790 sb->s_writers.frozen = SB_UNFROZEN;
1791 sb_freeze_unlock(sb);
1792out:
1793 wake_up(&sb->s_writers.wait_unfrozen);
1794 deactivate_locked_super(sb);
1795 return 0;
1796}
1797
1798int thaw_super(struct super_block *sb)
1799{
1800 down_write(&sb->s_umount);
1801 return thaw_super_locked(sb);
1802}
1803EXPORT_SYMBOL(thaw_super);
1804