1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/export.h>
24#include <linux/slab.h>
25#include <linux/blkdev.h>
26#include <linux/mount.h>
27#include <linux/security.h>
28#include <linux/writeback.h>
29#include <linux/idr.h>
30#include <linux/mutex.h>
31#include <linux/backing-dev.h>
32#include <linux/rculist_bl.h>
33#include <linux/cleancache.h>
34#include <linux/fsnotify.h>
35#include <linux/lockdep.h>
36#ifndef __GENKSYMS__
37#include <linux/user_namespace.h>
38#endif
39#include "internal.h"
40
41static int thaw_super_locked(struct super_block *sb);
42
43const unsigned super_block_wrapper_version = 0;
44
45LIST_HEAD(super_blocks);
46DEFINE_SPINLOCK(sb_lock);
47
48static char *sb_writers_name[SB_FREEZE_LEVELS] = {
49 "sb_writers",
50 "sb_pagefaults",
51 "sb_internal",
52};
53
54
55
56
57
58
59
60
61static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
62{
63 struct super_block *sb;
64 int fs_objects = 0;
65 int total_objects;
66
67 sb = container_of(shrink, struct super_block, s_shrink);
68
69
70
71
72
73
74
75
76
77 if (!(sb->s_flags & MS_BORN))
78 return -1;
79 smp_rmb();
80
81
82
83
84
85 if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
86 return -1;
87
88 if (sb->s_op && sb->s_op->nr_cached_objects)
89 fs_objects = sb->s_op->nr_cached_objects(sb);
90
91 total_objects = sb->s_nr_dentry_unused +
92 sb->s_nr_inodes_unused + fs_objects + 1;
93
94 if (sc->nr_to_scan) {
95 int dentries;
96 int inodes;
97
98
99 dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
100 total_objects;
101 inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
102 total_objects;
103 if (fs_objects)
104 fs_objects = (sc->nr_to_scan * fs_objects) /
105 total_objects;
106
107
108
109
110 prune_dcache_sb(sb, dentries);
111 prune_icache_sb(sb, inodes);
112
113 if (fs_objects && sb->s_op->free_cached_objects) {
114 sb->s_op->free_cached_objects(sb, fs_objects);
115 fs_objects = sb->s_op->nr_cached_objects(sb);
116 }
117 total_objects = sb->s_nr_dentry_unused +
118 sb->s_nr_inodes_unused + fs_objects;
119 }
120
121 total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
122 return total_objects;
123}
124
125
126
127
128
129
130
131static void destroy_super(struct super_block *s)
132{
133 int i;
134 for (i = 0; i < SB_FREEZE_LEVELS; i++)
135 percpu_counter_destroy(&s->s_writers.counter[i]);
136 security_sb_free(s);
137 WARN_ON(!list_empty(&s->s_mounts));
138 put_user_ns(s->s_user_ns);
139 kfree(s->s_subtype);
140 kfree(s->s_options);
141 kfree_rcu(s, rcu);
142}
143
144
145
146
147
148
149
150
151
152
153static struct super_block *alloc_super(struct file_system_type *type, int flags,
154 struct user_namespace *user_ns)
155{
156 struct super_block *s = kzalloc(sizeof(struct super_block_wrapper), GFP_USER);
157 static const struct super_operations default_op;
158 int i;
159
160 if (!s)
161 return NULL;
162
163 s->s_user_ns = get_user_ns(user_ns);
164
165 if (security_sb_alloc(s))
166 goto fail;
167 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
168 if (percpu_counter_init(&s->s_writers.counter[i], 0,
169 GFP_KERNEL) < 0)
170 goto fail;
171 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
172 &type->s_writers_key[i], 0);
173 }
174 init_waitqueue_head(&s->s_writers.wait);
175 init_waitqueue_head(&s->s_writers.wait_unfrozen);
176 s->s_flags = flags;
177 s->s_bdi = &default_backing_dev_info;
178 if (s->s_user_ns != &init_user_ns)
179 s->s_iflags |= SB_I_NODEV;
180 INIT_HLIST_NODE(&s->s_instances);
181 INIT_HLIST_BL_HEAD(&s->s_anon);
182 mutex_init(&s->s_sync_lock);
183 INIT_LIST_HEAD(&s->s_inodes);
184 spin_lock_init(&s->s_inode_list_lock);
185 INIT_LIST_HEAD(&s->s_inodes_wb);
186 spin_lock_init(&s->s_inode_wblist_lock);
187 INIT_LIST_HEAD(&s->s_dentry_lru);
188 INIT_LIST_HEAD(&s->s_inode_lru);
189 spin_lock_init(&s->s_inode_lru_lock);
190 INIT_LIST_HEAD(&s->s_mounts);
191 init_rwsem(&s->s_umount);
192 lockdep_set_class(&s->s_umount, &type->s_umount_key);
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
209 s->s_count = 1;
210 atomic_set(&s->s_active, 1);
211 mutex_init(&s->s_vfs_rename_mutex);
212 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
213 mutex_init(&s->s_dquot.dqio_mutex);
214 mutex_init(&s->s_dquot.dqonoff_mutex);
215 init_rwsem(&s->s_dquot.dqptr_sem);
216 s->s_maxbytes = MAX_NON_LFS;
217 s->s_op = &default_op;
218 s->s_time_gran = 1000000000;
219 s->cleancache_poolid = -1;
220
221 s->s_shrink.seeks = DEFAULT_SEEKS;
222 s->s_shrink.shrink = prune_super;
223 s->s_shrink.batch = 1024;
224 return s;
225fail:
226 destroy_super(s);
227 return NULL;
228}
229
230
231
232
233
234
235static void __put_super(struct super_block *sb)
236{
237 if (!--sb->s_count) {
238 list_del_init(&sb->s_list);
239 destroy_super(sb);
240 }
241}
242
243
244
245
246
247
248
249
250static void put_super(struct super_block *sb)
251{
252 spin_lock(&sb_lock);
253 __put_super(sb);
254 spin_unlock(&sb_lock);
255}
256
257
258
259
260
261
262
263
264
265
266
267
268
269void deactivate_locked_super(struct super_block *s)
270{
271 struct file_system_type *fs = s->s_type;
272 if (atomic_dec_and_test(&s->s_active)) {
273 cleancache_invalidate_fs(s);
274 unregister_shrinker(&s->s_shrink);
275 fs->kill_sb(s);
276
277 put_filesystem(fs);
278 put_super(s);
279 } else {
280 up_write(&s->s_umount);
281 }
282}
283
284EXPORT_SYMBOL(deactivate_locked_super);
285
286
287
288
289
290
291
292
293
294void deactivate_super(struct super_block *s)
295{
296 if (!atomic_add_unless(&s->s_active, -1, 1)) {
297 down_write(&s->s_umount);
298 deactivate_locked_super(s);
299 }
300}
301
302EXPORT_SYMBOL(deactivate_super);
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317static int grab_super(struct super_block *s) __releases(sb_lock)
318{
319 s->s_count++;
320 spin_unlock(&sb_lock);
321 down_write(&s->s_umount);
322 if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
323 put_super(s);
324 return 1;
325 }
326 up_write(&s->s_umount);
327 put_super(s);
328 return 0;
329}
330
331
332
333
334
335
336
337
338
339
340
341
342
343bool grab_super_passive(struct super_block *sb)
344{
345 spin_lock(&sb_lock);
346 if (hlist_unhashed(&sb->s_instances)) {
347 spin_unlock(&sb_lock);
348 return false;
349 }
350
351 sb->s_count++;
352 spin_unlock(&sb_lock);
353
354 if (down_read_trylock(&sb->s_umount)) {
355 if (sb->s_root && (sb->s_flags & MS_BORN))
356 return true;
357 up_read(&sb->s_umount);
358 }
359
360 put_super(sb);
361 return false;
362}
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378void generic_shutdown_super(struct super_block *sb)
379{
380 const struct super_operations *sop = sb->s_op;
381
382 if (sb->s_root) {
383 shrink_dcache_for_umount(sb);
384 sync_filesystem(sb);
385 sb->s_flags &= ~MS_ACTIVE;
386
387 fsnotify_unmount_inodes(sb);
388
389 evict_inodes(sb);
390
391 if (sb->s_dio_done_wq) {
392 destroy_workqueue(sb->s_dio_done_wq);
393 sb->s_dio_done_wq = NULL;
394 }
395
396 if (sop->put_super)
397 sop->put_super(sb);
398
399 if (!list_empty(&sb->s_inodes)) {
400 printk("VFS: Busy inodes after unmount of %s. "
401 "Self-destruct in 5 seconds. Have a nice day...\n",
402 sb->s_id);
403 }
404 }
405 spin_lock(&sb_lock);
406
407 hlist_del_init(&sb->s_instances);
408 spin_unlock(&sb_lock);
409 up_write(&sb->s_umount);
410}
411
412EXPORT_SYMBOL(generic_shutdown_super);
413
414
415
416
417
418
419
420
421
422
423struct super_block *sget_userns(struct file_system_type *type,
424 int (*test)(struct super_block *,void *),
425 int (*set)(struct super_block *,void *),
426 int flags, struct user_namespace *user_ns,
427 void *data)
428{
429 struct super_block *s = NULL;
430 struct super_block *old;
431 int err;
432
433 if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
434 !(type->fs_flags & FS_USERNS_MOUNT) &&
435 !capable(CAP_SYS_ADMIN))
436 return ERR_PTR(-EPERM);
437retry:
438 spin_lock(&sb_lock);
439 if (test) {
440 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
441 if (!test(old, data))
442 continue;
443 if (user_ns != old->s_user_ns) {
444 spin_unlock(&sb_lock);
445 if (s) {
446 up_write(&s->s_umount);
447 destroy_super(s);
448 }
449 return ERR_PTR(-EBUSY);
450 }
451 if (!grab_super(old))
452 goto retry;
453 if (s) {
454 up_write(&s->s_umount);
455 destroy_super(s);
456 s = NULL;
457 }
458 return old;
459 }
460 }
461 if (!s) {
462 spin_unlock(&sb_lock);
463 s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
464 if (!s)
465 return ERR_PTR(-ENOMEM);
466 goto retry;
467 }
468
469 err = set(s, data);
470 if (err) {
471 spin_unlock(&sb_lock);
472 up_write(&s->s_umount);
473 destroy_super(s);
474 return ERR_PTR(err);
475 }
476 s->s_type = type;
477 strlcpy(s->s_id, type->name, sizeof(s->s_id));
478 list_add_tail(&s->s_list, &super_blocks);
479 hlist_add_head(&s->s_instances, &type->fs_supers);
480 spin_unlock(&sb_lock);
481 get_filesystem(type);
482 register_shrinker(&s->s_shrink);
483 return s;
484}
485
486EXPORT_SYMBOL(sget_userns);
487
488
489
490
491
492
493
494
495
496struct super_block *sget(struct file_system_type *type,
497 int (*test)(struct super_block *,void *),
498 int (*set)(struct super_block *,void *),
499 int flags,
500 void *data)
501{
502 struct user_namespace *user_ns = current_user_ns();
503
504
505
506
507
508 if (flags & MS_SUBMOUNT)
509 user_ns = &init_user_ns;
510
511
512 if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
513 return ERR_PTR(-EPERM);
514
515 return sget_userns(type, test, set, flags, user_ns, data);
516}
517
518EXPORT_SYMBOL(sget);
519
520void drop_super(struct super_block *sb)
521{
522 up_read(&sb->s_umount);
523 put_super(sb);
524}
525
526EXPORT_SYMBOL(drop_super);
527
528
529
530
531
532
533
534
535
536void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
537{
538 struct super_block *sb, *p = NULL;
539
540 spin_lock(&sb_lock);
541 list_for_each_entry(sb, &super_blocks, s_list) {
542 if (hlist_unhashed(&sb->s_instances))
543 continue;
544 sb->s_count++;
545 spin_unlock(&sb_lock);
546
547 down_read(&sb->s_umount);
548 if (sb->s_root && (sb->s_flags & MS_BORN))
549 f(sb, arg);
550 up_read(&sb->s_umount);
551
552 spin_lock(&sb_lock);
553 if (p)
554 __put_super(p);
555 p = sb;
556 }
557 if (p)
558 __put_super(p);
559 spin_unlock(&sb_lock);
560}
561
562
563
564
565
566
567
568
569
570
571void iterate_supers_type(struct file_system_type *type,
572 void (*f)(struct super_block *, void *), void *arg)
573{
574 struct super_block *sb, *p = NULL;
575
576 spin_lock(&sb_lock);
577 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
578 sb->s_count++;
579 spin_unlock(&sb_lock);
580
581 down_read(&sb->s_umount);
582 if (sb->s_root && (sb->s_flags & MS_BORN))
583 f(sb, arg);
584 up_read(&sb->s_umount);
585
586 spin_lock(&sb_lock);
587 if (p)
588 __put_super(p);
589 p = sb;
590 }
591 if (p)
592 __put_super(p);
593 spin_unlock(&sb_lock);
594}
595
596EXPORT_SYMBOL(iterate_supers_type);
597
598
599
600
601
602
603
604
605
606struct super_block *get_super(struct block_device *bdev)
607{
608 struct super_block *sb;
609
610 if (!bdev)
611 return NULL;
612
613 spin_lock(&sb_lock);
614rescan:
615 list_for_each_entry(sb, &super_blocks, s_list) {
616 if (hlist_unhashed(&sb->s_instances))
617 continue;
618 if (sb->s_bdev == bdev) {
619 sb->s_count++;
620 spin_unlock(&sb_lock);
621 down_read(&sb->s_umount);
622
623 if (sb->s_root && (sb->s_flags & MS_BORN))
624 return sb;
625 up_read(&sb->s_umount);
626
627 spin_lock(&sb_lock);
628 __put_super(sb);
629 goto rescan;
630 }
631 }
632 spin_unlock(&sb_lock);
633 return NULL;
634}
635
636EXPORT_SYMBOL(get_super);
637
638
639
640
641
642
643
644
645
646
647struct super_block *get_super_thawed(struct block_device *bdev)
648{
649 while (1) {
650 struct super_block *s = get_super(bdev);
651 if (!s || s->s_writers.frozen == SB_UNFROZEN)
652 return s;
653 up_read(&s->s_umount);
654 wait_event(s->s_writers.wait_unfrozen,
655 s->s_writers.frozen == SB_UNFROZEN);
656 put_super(s);
657 }
658}
659EXPORT_SYMBOL(get_super_thawed);
660
661
662
663
664
665
666
667
668
669struct super_block *get_active_super(struct block_device *bdev)
670{
671 struct super_block *sb;
672
673 if (!bdev)
674 return NULL;
675
676restart:
677 spin_lock(&sb_lock);
678 list_for_each_entry(sb, &super_blocks, s_list) {
679 if (hlist_unhashed(&sb->s_instances))
680 continue;
681 if (sb->s_bdev == bdev) {
682 if (!grab_super(sb))
683 goto restart;
684 up_write(&sb->s_umount);
685 return sb;
686 }
687 }
688 spin_unlock(&sb_lock);
689 return NULL;
690}
691
692struct super_block *user_get_super(dev_t dev)
693{
694 struct super_block *sb;
695
696 spin_lock(&sb_lock);
697rescan:
698 list_for_each_entry(sb, &super_blocks, s_list) {
699 if (hlist_unhashed(&sb->s_instances))
700 continue;
701 if (sb->s_dev == dev) {
702 sb->s_count++;
703 spin_unlock(&sb_lock);
704 down_read(&sb->s_umount);
705
706 if (sb->s_root && (sb->s_flags & MS_BORN))
707 return sb;
708 up_read(&sb->s_umount);
709
710 spin_lock(&sb_lock);
711 __put_super(sb);
712 goto rescan;
713 }
714 }
715 spin_unlock(&sb_lock);
716 return NULL;
717}
718
719
720
721
722
723
724
725
726
727
728int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
729{
730 int retval;
731 int remount_ro;
732
733 if (sb->s_writers.frozen != SB_UNFROZEN)
734 return -EBUSY;
735
736#ifdef CONFIG_BLOCK
737 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
738 return -EACCES;
739#endif
740
741 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
742
743 if (remount_ro) {
744 if (!hlist_empty(&sb->s_pins)) {
745 up_write(&sb->s_umount);
746 group_pin_kill(&sb->s_pins);
747 down_write(&sb->s_umount);
748 if (!sb->s_root)
749 return 0;
750 if (sb->s_writers.frozen != SB_UNFROZEN)
751 return -EBUSY;
752 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
753 }
754 }
755 shrink_dcache_sb(sb);
756 sync_filesystem(sb);
757
758
759
760 if (remount_ro) {
761 if (force) {
762 sb->s_readonly_remount = 1;
763 smp_wmb();
764 } else {
765 retval = sb_prepare_remount_readonly(sb);
766 if (retval)
767 return retval;
768 }
769 }
770
771 if (sb->s_op->remount_fs) {
772 retval = sb->s_op->remount_fs(sb, &flags, data);
773 if (retval) {
774 if (!force)
775 goto cancel_readonly;
776
777 WARN(1, "forced remount of a %s fs returned %i\n",
778 sb->s_type->name, retval);
779 }
780 }
781 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
782
783 smp_wmb();
784 sb->s_readonly_remount = 0;
785
786
787
788
789
790
791
792
793
794 if (remount_ro && sb->s_bdev)
795 invalidate_bdev(sb->s_bdev);
796 return 0;
797
798cancel_readonly:
799 sb->s_readonly_remount = 0;
800 return retval;
801}
802
803static void do_emergency_remount(struct work_struct *work)
804{
805 struct super_block *sb, *p = NULL;
806
807 spin_lock(&sb_lock);
808 list_for_each_entry(sb, &super_blocks, s_list) {
809 if (hlist_unhashed(&sb->s_instances))
810 continue;
811 sb->s_count++;
812 spin_unlock(&sb_lock);
813 down_write(&sb->s_umount);
814 if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
815 !(sb->s_flags & MS_RDONLY)) {
816
817
818
819 do_remount_sb(sb, MS_RDONLY, NULL, 1);
820 }
821 up_write(&sb->s_umount);
822 spin_lock(&sb_lock);
823 if (p)
824 __put_super(p);
825 p = sb;
826 }
827 if (p)
828 __put_super(p);
829 spin_unlock(&sb_lock);
830 kfree(work);
831 printk("Emergency Remount complete\n");
832}
833
834void emergency_remount(void)
835{
836 struct work_struct *work;
837
838 work = kmalloc(sizeof(*work), GFP_ATOMIC);
839 if (work) {
840 INIT_WORK(work, do_emergency_remount);
841 schedule_work(work);
842 }
843}
844
845static void do_thaw_all(struct work_struct *work)
846{
847 struct super_block *sb, *p = NULL;
848
849 spin_lock(&sb_lock);
850 list_for_each_entry(sb, &super_blocks, s_list) {
851 if (hlist_unhashed(&sb->s_instances))
852 continue;
853 sb->s_count++;
854 spin_unlock(&sb_lock);
855 down_write(&sb->s_umount);
856 if (sb->s_root && sb->s_flags & MS_BORN) {
857 emergency_thaw_bdev(sb);
858 thaw_super_locked(sb);
859 } else {
860 up_write(&sb->s_umount);
861 }
862 spin_lock(&sb_lock);
863 if (p)
864 __put_super(p);
865 p = sb;
866 }
867 if (p)
868 __put_super(p);
869 spin_unlock(&sb_lock);
870 kfree(work);
871 printk(KERN_WARNING "Emergency Thaw complete\n");
872}
873
874
875
876
877
878
879void emergency_thaw_all(void)
880{
881 struct work_struct *work;
882
883 work = kmalloc(sizeof(*work), GFP_ATOMIC);
884 if (work) {
885 INIT_WORK(work, do_thaw_all);
886 schedule_work(work);
887 }
888}
889
890
891
892
893
894
895static DEFINE_IDA(unnamed_dev_ida);
896static DEFINE_SPINLOCK(unnamed_dev_lock);
897
898
899
900static int unnamed_dev_start = 1;
901
902int get_anon_bdev(dev_t *p)
903{
904 int dev;
905 int error;
906
907 retry:
908 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
909 return -ENOMEM;
910 spin_lock(&unnamed_dev_lock);
911 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
912 if (!error)
913 unnamed_dev_start = dev + 1;
914 spin_unlock(&unnamed_dev_lock);
915 if (error == -EAGAIN)
916
917 goto retry;
918 else if (error)
919 return -EAGAIN;
920
921 if (dev == (1 << MINORBITS)) {
922 spin_lock(&unnamed_dev_lock);
923 ida_remove(&unnamed_dev_ida, dev);
924 if (unnamed_dev_start > dev)
925 unnamed_dev_start = dev;
926 spin_unlock(&unnamed_dev_lock);
927 return -EMFILE;
928 }
929 *p = MKDEV(0, dev & MINORMASK);
930 return 0;
931}
932EXPORT_SYMBOL(get_anon_bdev);
933
934void free_anon_bdev(dev_t dev)
935{
936 int slot = MINOR(dev);
937 spin_lock(&unnamed_dev_lock);
938 ida_remove(&unnamed_dev_ida, slot);
939 if (slot < unnamed_dev_start)
940 unnamed_dev_start = slot;
941 spin_unlock(&unnamed_dev_lock);
942}
943EXPORT_SYMBOL(free_anon_bdev);
944
945int set_anon_super(struct super_block *s, void *data)
946{
947 int error = get_anon_bdev(&s->s_dev);
948 if (!error)
949 s->s_bdi = &noop_backing_dev_info;
950 return error;
951}
952
953EXPORT_SYMBOL(set_anon_super);
954
955void kill_anon_super(struct super_block *sb)
956{
957 dev_t dev = sb->s_dev;
958 generic_shutdown_super(sb);
959 free_anon_bdev(dev);
960}
961
962EXPORT_SYMBOL(kill_anon_super);
963
964void kill_litter_super(struct super_block *sb)
965{
966 if (sb->s_root)
967 d_genocide(sb->s_root);
968 kill_anon_super(sb);
969}
970
971EXPORT_SYMBOL(kill_litter_super);
972
973static int ns_test_super(struct super_block *sb, void *data)
974{
975 return sb->s_fs_info == data;
976}
977
978static int ns_set_super(struct super_block *sb, void *data)
979{
980 sb->s_fs_info = data;
981 return set_anon_super(sb, NULL);
982}
983
984struct dentry *mount_ns(struct file_system_type *fs_type,
985 int flags, void *data, void *ns, struct user_namespace *user_ns,
986 int (*fill_super)(struct super_block *, void *, int))
987{
988 struct super_block *sb;
989
990
991
992
993 if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
994 return ERR_PTR(-EPERM);
995
996 sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
997 user_ns, ns);
998 if (IS_ERR(sb))
999 return ERR_CAST(sb);
1000
1001 if (!sb->s_root) {
1002 int err;
1003 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
1004 if (err) {
1005 deactivate_locked_super(sb);
1006 return ERR_PTR(err);
1007 }
1008
1009 sb->s_flags |= MS_ACTIVE;
1010 }
1011
1012 return dget(sb->s_root);
1013}
1014
1015EXPORT_SYMBOL(mount_ns);
1016
1017#ifdef CONFIG_BLOCK
1018static int set_bdev_super(struct super_block *s, void *data)
1019{
1020 s->s_bdev = data;
1021 s->s_dev = s->s_bdev->bd_dev;
1022
1023
1024
1025
1026
1027 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
1028 return 0;
1029}
1030
1031static int test_bdev_super(struct super_block *s, void *data)
1032{
1033 return (void *)s->s_bdev == data;
1034}
1035
1036struct dentry *mount_bdev(struct file_system_type *fs_type,
1037 int flags, const char *dev_name, void *data,
1038 int (*fill_super)(struct super_block *, void *, int))
1039{
1040 struct block_device *bdev;
1041 struct super_block *s;
1042 fmode_t mode = FMODE_READ | FMODE_EXCL;
1043 int error = 0;
1044
1045 if (!(flags & MS_RDONLY))
1046 mode |= FMODE_WRITE;
1047
1048 bdev = blkdev_get_by_path(dev_name, mode, fs_type);
1049 if (IS_ERR(bdev))
1050 return ERR_CAST(bdev);
1051
1052
1053
1054
1055
1056
1057 mutex_lock(&bdev->bd_fsfreeze_mutex);
1058 if (bdev->bd_fsfreeze_count > 0) {
1059 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1060 error = -EBUSY;
1061 goto error_bdev;
1062 }
1063 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
1064 bdev);
1065 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1066 if (IS_ERR(s))
1067 goto error_s;
1068
1069 if (s->s_root) {
1070 if ((flags ^ s->s_flags) & MS_RDONLY) {
1071 deactivate_locked_super(s);
1072 error = -EBUSY;
1073 goto error_bdev;
1074 }
1075
1076
1077
1078
1079
1080
1081
1082
1083 up_write(&s->s_umount);
1084 blkdev_put(bdev, mode);
1085 down_write(&s->s_umount);
1086 } else {
1087 char b[BDEVNAME_SIZE];
1088
1089 s->s_mode = mode;
1090 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1091 sb_set_blocksize(s, block_size(bdev));
1092 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1093 if (error) {
1094 deactivate_locked_super(s);
1095 goto error;
1096 }
1097
1098 s->s_flags |= MS_ACTIVE;
1099 bdev->bd_super = s;
1100 }
1101
1102 return dget(s->s_root);
1103
1104error_s:
1105 error = PTR_ERR(s);
1106error_bdev:
1107 blkdev_put(bdev, mode);
1108error:
1109 return ERR_PTR(error);
1110}
1111EXPORT_SYMBOL(mount_bdev);
1112
1113void kill_block_super(struct super_block *sb)
1114{
1115 struct block_device *bdev = sb->s_bdev;
1116 fmode_t mode = sb->s_mode;
1117
1118 bdev->bd_super = NULL;
1119 generic_shutdown_super(sb);
1120 sync_blockdev(bdev);
1121 WARN_ON_ONCE(!(mode & FMODE_EXCL));
1122 blkdev_put(bdev, mode | FMODE_EXCL);
1123}
1124
1125EXPORT_SYMBOL(kill_block_super);
1126#endif
1127
1128struct dentry *mount_nodev(struct file_system_type *fs_type,
1129 int flags, void *data,
1130 int (*fill_super)(struct super_block *, void *, int))
1131{
1132 int error;
1133 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1134
1135 if (IS_ERR(s))
1136 return ERR_CAST(s);
1137
1138 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1139 if (error) {
1140 deactivate_locked_super(s);
1141 return ERR_PTR(error);
1142 }
1143 s->s_flags |= MS_ACTIVE;
1144 return dget(s->s_root);
1145}
1146EXPORT_SYMBOL(mount_nodev);
1147
1148static int compare_single(struct super_block *s, void *p)
1149{
1150 return 1;
1151}
1152
1153struct dentry *mount_single(struct file_system_type *fs_type,
1154 int flags, void *data,
1155 int (*fill_super)(struct super_block *, void *, int))
1156{
1157 struct super_block *s;
1158 int error;
1159
1160 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1161 if (IS_ERR(s))
1162 return ERR_CAST(s);
1163 if (!s->s_root) {
1164 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1165 if (error) {
1166 deactivate_locked_super(s);
1167 return ERR_PTR(error);
1168 }
1169 s->s_flags |= MS_ACTIVE;
1170 } else {
1171 do_remount_sb(s, flags, data, 0);
1172 }
1173 return dget(s->s_root);
1174}
1175EXPORT_SYMBOL(mount_single);
1176
1177struct dentry *
1178mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1179{
1180 struct dentry *root;
1181 struct super_block *sb;
1182 char *secdata = NULL;
1183 int error = -ENOMEM;
1184
1185 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
1186 secdata = alloc_secdata();
1187 if (!secdata)
1188 goto out;
1189
1190 error = security_sb_copy_data(data, secdata);
1191 if (error)
1192 goto out_free_secdata;
1193 }
1194
1195 root = type->mount(type, flags, name, data);
1196 if (IS_ERR(root)) {
1197 error = PTR_ERR(root);
1198 goto out_free_secdata;
1199 }
1200 sb = root->d_sb;
1201 BUG_ON(!sb);
1202 WARN_ON(!sb->s_bdi);
1203 WARN_ON(sb->s_bdi == &default_backing_dev_info);
1204
1205
1206
1207
1208
1209
1210
1211 smp_wmb();
1212 sb->s_flags |= MS_BORN;
1213
1214 error = security_sb_kern_mount(sb, flags, secdata);
1215 if (error)
1216 goto out_sb;
1217
1218
1219
1220
1221
1222
1223
1224 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1225 "negative value (%lld)\n", type->name, sb->s_maxbytes);
1226
1227 up_write(&sb->s_umount);
1228 free_secdata(secdata);
1229 return root;
1230out_sb:
1231 dput(root);
1232 deactivate_locked_super(sb);
1233out_free_secdata:
1234 free_secdata(secdata);
1235out:
1236 return ERR_PTR(error);
1237}
1238
1239
1240
1241
1242
1243void __sb_end_write(struct super_block *sb, int level)
1244{
1245 percpu_counter_dec(&sb->s_writers.counter[level-1]);
1246
1247
1248
1249
1250 smp_mb();
1251 if (waitqueue_active(&sb->s_writers.wait))
1252 wake_up(&sb->s_writers.wait);
1253 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
1254}
1255EXPORT_SYMBOL(__sb_end_write);
1256
1257#ifdef CONFIG_LOCKDEP
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1268 unsigned long ip)
1269{
1270 int i;
1271
1272 if (!trylock) {
1273 for (i = 0; i < level - 1; i++)
1274 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1275 trylock = true;
1276 break;
1277 }
1278 }
1279 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1280}
1281#endif
1282
1283
1284
1285
1286
1287int __sb_start_write(struct super_block *sb, int level, bool wait)
1288{
1289retry:
1290 if (unlikely(sb->s_writers.frozen >= level)) {
1291 if (!wait)
1292 return 0;
1293 wait_event(sb->s_writers.wait_unfrozen,
1294 sb->s_writers.frozen < level);
1295 }
1296
1297#ifdef CONFIG_LOCKDEP
1298 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1299#endif
1300 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1301
1302
1303
1304
1305 smp_mb();
1306 if (unlikely(sb->s_writers.frozen >= level)) {
1307 __sb_end_write(sb, level);
1308 goto retry;
1309 }
1310 return 1;
1311}
1312EXPORT_SYMBOL(__sb_start_write);
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324static void sb_wait_write(struct super_block *sb, int level)
1325{
1326 s64 writers;
1327
1328
1329
1330
1331
1332 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
1333 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
1334
1335 do {
1336 DEFINE_WAIT(wait);
1337
1338
1339
1340
1341
1342 prepare_to_wait(&sb->s_writers.wait, &wait,
1343 TASK_UNINTERRUPTIBLE);
1344
1345 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
1346 if (writers)
1347 schedule();
1348
1349 finish_wait(&sb->s_writers.wait, &wait);
1350 } while (writers);
1351}
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386int freeze_super(struct super_block *sb)
1387{
1388 int ret;
1389
1390 atomic_inc(&sb->s_active);
1391 down_write(&sb->s_umount);
1392 if (sb->s_writers.frozen != SB_UNFROZEN) {
1393 deactivate_locked_super(sb);
1394 return -EBUSY;
1395 }
1396
1397 if (!(sb->s_flags & MS_BORN)) {
1398 up_write(&sb->s_umount);
1399 return 0;
1400 }
1401
1402 if (sb->s_flags & MS_RDONLY) {
1403
1404 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1405 up_write(&sb->s_umount);
1406 return 0;
1407 }
1408
1409
1410 sb->s_writers.frozen = SB_FREEZE_WRITE;
1411 smp_wmb();
1412
1413
1414 up_write(&sb->s_umount);
1415
1416 sb_wait_write(sb, SB_FREEZE_WRITE);
1417
1418
1419 down_write(&sb->s_umount);
1420 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1421 smp_wmb();
1422
1423 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1424
1425
1426 sync_filesystem(sb);
1427
1428
1429 sb->s_writers.frozen = SB_FREEZE_FS;
1430 smp_wmb();
1431 sb_wait_write(sb, SB_FREEZE_FS);
1432
1433 if (sb->s_op->freeze_fs) {
1434 ret = sb->s_op->freeze_fs(sb);
1435 if (ret) {
1436 printk(KERN_ERR
1437 "VFS:Filesystem freeze failed\n");
1438 sb->s_writers.frozen = SB_UNFROZEN;
1439 smp_wmb();
1440 wake_up(&sb->s_writers.wait_unfrozen);
1441 deactivate_locked_super(sb);
1442 return ret;
1443 }
1444 }
1445
1446
1447
1448
1449 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1450 up_write(&sb->s_umount);
1451 return 0;
1452}
1453EXPORT_SYMBOL(freeze_super);
1454
1455
1456
1457
1458
1459
1460
1461static int thaw_super_locked(struct super_block *sb)
1462{
1463 int error;
1464
1465 if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
1466 up_write(&sb->s_umount);
1467 return -EINVAL;
1468 }
1469
1470 if (sb->s_flags & MS_RDONLY)
1471 goto out;
1472
1473 if (sb->s_op->unfreeze_fs) {
1474 error = sb->s_op->unfreeze_fs(sb);
1475 if (error) {
1476 printk(KERN_ERR
1477 "VFS:Filesystem thaw failed\n");
1478 up_write(&sb->s_umount);
1479 return error;
1480 }
1481 }
1482
1483out:
1484 sb->s_writers.frozen = SB_UNFROZEN;
1485 smp_wmb();
1486 wake_up(&sb->s_writers.wait_unfrozen);
1487 deactivate_locked_super(sb);
1488
1489 return 0;
1490}
1491
1492int thaw_super(struct super_block *sb)
1493{
1494 down_write(&sb->s_umount);
1495 return thaw_super_locked(sb);
1496}
1497EXPORT_SYMBOL(thaw_super);
1498