1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/uaccess.h>
24#include <linux/proc_ns.h>
25#include <linux/magic.h>
26#include <linux/bootmem.h>
27#include <linux/task_work.h>
28#include <linux/sched/task.h>
29
30#include "pnode.h"
31#include "internal.h"
32
33
34unsigned int sysctl_mount_max __read_mostly = 100000;
35
36static unsigned int m_hash_mask __read_mostly;
37static unsigned int m_hash_shift __read_mostly;
38static unsigned int mp_hash_mask __read_mostly;
39static unsigned int mp_hash_shift __read_mostly;
40
41static __initdata unsigned long mhash_entries;
42static int __init set_mhash_entries(char *str)
43{
44 if (!str)
45 return 0;
46 mhash_entries = simple_strtoul(str, &str, 0);
47 return 1;
48}
49__setup("mhash_entries=", set_mhash_entries);
50
51static __initdata unsigned long mphash_entries;
52static int __init set_mphash_entries(char *str)
53{
54 if (!str)
55 return 0;
56 mphash_entries = simple_strtoul(str, &str, 0);
57 return 1;
58}
59__setup("mphash_entries=", set_mphash_entries);
60
61static u64 event;
62static DEFINE_IDA(mnt_id_ida);
63static DEFINE_IDA(mnt_group_ida);
64static DEFINE_SPINLOCK(mnt_id_lock);
65static int mnt_id_start = 0;
66static int mnt_group_start = 1;
67
68static struct hlist_head *mount_hashtable __read_mostly;
69static struct hlist_head *mountpoint_hashtable __read_mostly;
70static struct kmem_cache *mnt_cache __read_mostly;
71static DECLARE_RWSEM(namespace_sem);
72
73
74struct kobject *fs_kobj;
75EXPORT_SYMBOL_GPL(fs_kobj);
76
77
78
79
80
81
82
83
84
85__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
86
87static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
88{
89 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
90 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
91 tmp = tmp + (tmp >> m_hash_shift);
92 return &mount_hashtable[tmp & m_hash_mask];
93}
94
95static inline struct hlist_head *mp_hash(struct dentry *dentry)
96{
97 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
98 tmp = tmp + (tmp >> mp_hash_shift);
99 return &mountpoint_hashtable[tmp & mp_hash_mask];
100}
101
102static int mnt_alloc_id(struct mount *mnt)
103{
104 int res;
105
106retry:
107 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
108 spin_lock(&mnt_id_lock);
109 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
110 if (!res)
111 mnt_id_start = mnt->mnt_id + 1;
112 spin_unlock(&mnt_id_lock);
113 if (res == -EAGAIN)
114 goto retry;
115
116 return res;
117}
118
119static void mnt_free_id(struct mount *mnt)
120{
121 int id = mnt->mnt_id;
122 spin_lock(&mnt_id_lock);
123 ida_remove(&mnt_id_ida, id);
124 if (mnt_id_start > id)
125 mnt_id_start = id;
126 spin_unlock(&mnt_id_lock);
127}
128
129
130
131
132
133
134static int mnt_alloc_group_id(struct mount *mnt)
135{
136 int res;
137
138 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
139 return -ENOMEM;
140
141 res = ida_get_new_above(&mnt_group_ida,
142 mnt_group_start,
143 &mnt->mnt_group_id);
144 if (!res)
145 mnt_group_start = mnt->mnt_group_id + 1;
146
147 return res;
148}
149
150
151
152
153void mnt_release_group_id(struct mount *mnt)
154{
155 int id = mnt->mnt_group_id;
156 ida_remove(&mnt_group_ida, id);
157 if (mnt_group_start > id)
158 mnt_group_start = id;
159 mnt->mnt_group_id = 0;
160}
161
162
163
164
165static inline void mnt_add_count(struct mount *mnt, int n)
166{
167#ifdef CONFIG_SMP
168 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
169#else
170 preempt_disable();
171 mnt->mnt_count += n;
172 preempt_enable();
173#endif
174}
175
176
177
178
179unsigned int mnt_get_count(struct mount *mnt)
180{
181#ifdef CONFIG_SMP
182 unsigned int count = 0;
183 int cpu;
184
185 for_each_possible_cpu(cpu) {
186 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
187 }
188
189 return count;
190#else
191 return mnt->mnt_count;
192#endif
193}
194
195static void drop_mountpoint(struct fs_pin *p)
196{
197 struct mount *m = container_of(p, struct mount, mnt_umount);
198 dput(m->mnt_ex_mountpoint);
199 pin_remove(p);
200 mntput(&m->mnt);
201}
202
203static struct mount *alloc_vfsmnt(const char *name)
204{
205 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
206 if (mnt) {
207 int err;
208
209 err = mnt_alloc_id(mnt);
210 if (err)
211 goto out_free_cache;
212
213 if (name) {
214 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
215 if (!mnt->mnt_devname)
216 goto out_free_id;
217 }
218
219#ifdef CONFIG_SMP
220 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
221 if (!mnt->mnt_pcp)
222 goto out_free_devname;
223
224 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
225#else
226 mnt->mnt_count = 1;
227 mnt->mnt_writers = 0;
228#endif
229
230 INIT_HLIST_NODE(&mnt->mnt_hash);
231 INIT_LIST_HEAD(&mnt->mnt_child);
232 INIT_LIST_HEAD(&mnt->mnt_mounts);
233 INIT_LIST_HEAD(&mnt->mnt_list);
234 INIT_LIST_HEAD(&mnt->mnt_expire);
235 INIT_LIST_HEAD(&mnt->mnt_share);
236 INIT_LIST_HEAD(&mnt->mnt_slave_list);
237 INIT_LIST_HEAD(&mnt->mnt_slave);
238 INIT_HLIST_NODE(&mnt->mnt_mp_list);
239 INIT_LIST_HEAD(&mnt->mnt_umounting);
240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
241 }
242 return mnt;
243
244#ifdef CONFIG_SMP
245out_free_devname:
246 kfree_const(mnt->mnt_devname);
247#endif
248out_free_id:
249 mnt_free_id(mnt);
250out_free_cache:
251 kmem_cache_free(mnt_cache, mnt);
252 return NULL;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274int __mnt_is_readonly(struct vfsmount *mnt)
275{
276 if (mnt->mnt_flags & MNT_READONLY)
277 return 1;
278 if (sb_rdonly(mnt->mnt_sb))
279 return 1;
280 return 0;
281}
282EXPORT_SYMBOL_GPL(__mnt_is_readonly);
283
284static inline void mnt_inc_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers++;
290#endif
291}
292
293static inline void mnt_dec_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
297#else
298 mnt->mnt_writers--;
299#endif
300}
301
302static unsigned int mnt_get_writers(struct mount *mnt)
303{
304#ifdef CONFIG_SMP
305 unsigned int count = 0;
306 int cpu;
307
308 for_each_possible_cpu(cpu) {
309 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
310 }
311
312 return count;
313#else
314 return mnt->mnt_writers;
315#endif
316}
317
318static int mnt_is_readonly(struct vfsmount *mnt)
319{
320 if (mnt->mnt_sb->s_readonly_remount)
321 return 1;
322
323 smp_rmb();
324 return __mnt_is_readonly(mnt);
325}
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343int __mnt_want_write(struct vfsmount *m)
344{
345 struct mount *mnt = real_mount(m);
346 int ret = 0;
347
348 preempt_disable();
349 mnt_inc_writers(mnt);
350
351
352
353
354
355 smp_mb();
356 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
357 cpu_relax();
358
359
360
361
362
363 smp_rmb();
364 if (mnt_is_readonly(m)) {
365 mnt_dec_writers(mnt);
366 ret = -EROFS;
367 }
368 preempt_enable();
369
370 return ret;
371}
372
373
374
375
376
377
378
379
380
381
382int mnt_want_write(struct vfsmount *m)
383{
384 int ret;
385
386 sb_start_write(m->mnt_sb);
387 ret = __mnt_want_write(m);
388 if (ret)
389 sb_end_write(m->mnt_sb);
390 return ret;
391}
392EXPORT_SYMBOL_GPL(mnt_want_write);
393
394
395
396
397
398
399
400
401
402
403
404
405
406int mnt_clone_write(struct vfsmount *mnt)
407{
408
409 if (__mnt_is_readonly(mnt))
410 return -EROFS;
411 preempt_disable();
412 mnt_inc_writers(real_mount(mnt));
413 preempt_enable();
414 return 0;
415}
416EXPORT_SYMBOL_GPL(mnt_clone_write);
417
418
419
420
421
422
423
424
425int __mnt_want_write_file(struct file *file)
426{
427 if (!(file->f_mode & FMODE_WRITER))
428 return __mnt_want_write(file->f_path.mnt);
429 else
430 return mnt_clone_write(file->f_path.mnt);
431}
432
433
434
435
436
437
438
439
440
441
442
443
444
445int mnt_want_write_file_path(struct file *file)
446{
447 int ret;
448
449 sb_start_write(file->f_path.mnt->mnt_sb);
450 ret = __mnt_want_write_file(file);
451 if (ret)
452 sb_end_write(file->f_path.mnt->mnt_sb);
453 return ret;
454}
455
456static inline int may_write_real(struct file *file)
457{
458 struct dentry *dentry = file->f_path.dentry;
459 struct dentry *upperdentry;
460
461
462 if (file->f_mode & FMODE_WRITER)
463 return 0;
464
465
466 if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
467 return 0;
468
469
470 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
471 if (upperdentry &&
472 (file_inode(file) == d_inode(upperdentry) ||
473 file_inode(file) == d_inode(dentry)))
474 return 0;
475
476
477 return -EPERM;
478}
479
480
481
482
483
484
485
486
487
488
489
490
491int mnt_want_write_file(struct file *file)
492{
493 int ret;
494
495 ret = may_write_real(file);
496 if (!ret) {
497 sb_start_write(file_inode(file)->i_sb);
498 ret = __mnt_want_write_file(file);
499 if (ret)
500 sb_end_write(file_inode(file)->i_sb);
501 }
502 return ret;
503}
504EXPORT_SYMBOL_GPL(mnt_want_write_file);
505
506
507
508
509
510
511
512
513
514void __mnt_drop_write(struct vfsmount *mnt)
515{
516 preempt_disable();
517 mnt_dec_writers(real_mount(mnt));
518 preempt_enable();
519}
520
521
522
523
524
525
526
527
528
529void mnt_drop_write(struct vfsmount *mnt)
530{
531 __mnt_drop_write(mnt);
532 sb_end_write(mnt->mnt_sb);
533}
534EXPORT_SYMBOL_GPL(mnt_drop_write);
535
536void __mnt_drop_write_file(struct file *file)
537{
538 __mnt_drop_write(file->f_path.mnt);
539}
540
541void mnt_drop_write_file_path(struct file *file)
542{
543 mnt_drop_write(file->f_path.mnt);
544}
545
546void mnt_drop_write_file(struct file *file)
547{
548 __mnt_drop_write(file->f_path.mnt);
549 sb_end_write(file_inode(file)->i_sb);
550}
551EXPORT_SYMBOL(mnt_drop_write_file);
552
553static int mnt_make_readonly(struct mount *mnt)
554{
555 int ret = 0;
556
557 lock_mount_hash();
558 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
559
560
561
562
563 smp_mb();
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581 if (mnt_get_writers(mnt) > 0)
582 ret = -EBUSY;
583 else
584 mnt->mnt.mnt_flags |= MNT_READONLY;
585
586
587
588
589 smp_wmb();
590 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
591 unlock_mount_hash();
592 return ret;
593}
594
595static void __mnt_unmake_readonly(struct mount *mnt)
596{
597 lock_mount_hash();
598 mnt->mnt.mnt_flags &= ~MNT_READONLY;
599 unlock_mount_hash();
600}
601
602int sb_prepare_remount_readonly(struct super_block *sb)
603{
604 struct mount *mnt;
605 int err = 0;
606
607
608 if (atomic_long_read(&sb->s_remove_count))
609 return -EBUSY;
610
611 lock_mount_hash();
612 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
613 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
614 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
615 smp_mb();
616 if (mnt_get_writers(mnt) > 0) {
617 err = -EBUSY;
618 break;
619 }
620 }
621 }
622 if (!err && atomic_long_read(&sb->s_remove_count))
623 err = -EBUSY;
624
625 if (!err) {
626 sb->s_readonly_remount = 1;
627 smp_wmb();
628 }
629 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
630 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
631 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
632 }
633 unlock_mount_hash();
634
635 return err;
636}
637
638static void free_vfsmnt(struct mount *mnt)
639{
640 kfree_const(mnt->mnt_devname);
641#ifdef CONFIG_SMP
642 free_percpu(mnt->mnt_pcp);
643#endif
644 kmem_cache_free(mnt_cache, mnt);
645}
646
647static void delayed_free_vfsmnt(struct rcu_head *head)
648{
649 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
650}
651
652
653int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
654{
655 struct mount *mnt;
656 if (read_seqretry(&mount_lock, seq))
657 return 1;
658 if (bastard == NULL)
659 return 0;
660 mnt = real_mount(bastard);
661 mnt_add_count(mnt, 1);
662 if (likely(!read_seqretry(&mount_lock, seq)))
663 return 0;
664 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
665 mnt_add_count(mnt, -1);
666 return 1;
667 }
668 return -1;
669}
670
671
672bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
673{
674 int res = __legitimize_mnt(bastard, seq);
675 if (likely(!res))
676 return true;
677 if (unlikely(res < 0)) {
678 rcu_read_unlock();
679 mntput(bastard);
680 rcu_read_lock();
681 }
682 return false;
683}
684
685
686
687
688
689struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
690{
691 struct hlist_head *head = m_hash(mnt, dentry);
692 struct mount *p;
693
694 hlist_for_each_entry_rcu(p, head, mnt_hash)
695 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
696 return p;
697 return NULL;
698}
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716struct vfsmount *lookup_mnt(const struct path *path)
717{
718 struct mount *child_mnt;
719 struct vfsmount *m;
720 unsigned seq;
721
722 rcu_read_lock();
723 do {
724 seq = read_seqbegin(&mount_lock);
725 child_mnt = __lookup_mnt(path->mnt, path->dentry);
726 m = child_mnt ? &child_mnt->mnt : NULL;
727 } while (!legitimize_mnt(m, seq));
728 rcu_read_unlock();
729 return m;
730}
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747bool __is_local_mountpoint(struct dentry *dentry)
748{
749 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
750 struct mount *mnt;
751 bool is_covered = false;
752
753 if (!d_mountpoint(dentry))
754 goto out;
755
756 down_read(&namespace_sem);
757 list_for_each_entry(mnt, &ns->list, mnt_list) {
758 is_covered = (mnt->mnt_mountpoint == dentry);
759 if (is_covered)
760 break;
761 }
762 up_read(&namespace_sem);
763out:
764 return is_covered;
765}
766
767static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
768{
769 struct hlist_head *chain = mp_hash(dentry);
770 struct mountpoint *mp;
771
772 hlist_for_each_entry(mp, chain, m_hash) {
773 if (mp->m_dentry == dentry) {
774
775 if (d_unlinked(dentry))
776 return ERR_PTR(-ENOENT);
777 mp->m_count++;
778 return mp;
779 }
780 }
781 return NULL;
782}
783
784static struct mountpoint *get_mountpoint(struct dentry *dentry)
785{
786 struct mountpoint *mp, *new = NULL;
787 int ret;
788
789 if (d_mountpoint(dentry)) {
790mountpoint:
791 read_seqlock_excl(&mount_lock);
792 mp = lookup_mountpoint(dentry);
793 read_sequnlock_excl(&mount_lock);
794 if (mp)
795 goto done;
796 }
797
798 if (!new)
799 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
800 if (!new)
801 return ERR_PTR(-ENOMEM);
802
803
804
805 ret = d_set_mounted(dentry);
806
807
808 if (ret == -EBUSY)
809 goto mountpoint;
810
811
812 mp = ERR_PTR(ret);
813 if (ret)
814 goto done;
815
816
817 read_seqlock_excl(&mount_lock);
818 new->m_dentry = dentry;
819 new->m_count = 1;
820 hlist_add_head(&new->m_hash, mp_hash(dentry));
821 INIT_HLIST_HEAD(&new->m_list);
822 read_sequnlock_excl(&mount_lock);
823
824 mp = new;
825 new = NULL;
826done:
827 kfree(new);
828 return mp;
829}
830
831static void put_mountpoint(struct mountpoint *mp)
832{
833 if (!--mp->m_count) {
834 struct dentry *dentry = mp->m_dentry;
835 BUG_ON(!hlist_empty(&mp->m_list));
836 spin_lock(&dentry->d_lock);
837 dentry->d_flags &= ~DCACHE_MOUNTED;
838 spin_unlock(&dentry->d_lock);
839 hlist_del(&mp->m_hash);
840 kfree(mp);
841 }
842}
843
844static inline int check_mnt(struct mount *mnt)
845{
846 return mnt->mnt_ns == current->nsproxy->mnt_ns;
847}
848
849
850
851
852static void touch_mnt_namespace(struct mnt_namespace *ns)
853{
854 if (ns) {
855 ns->event = ++event;
856 wake_up_interruptible(&ns->poll);
857 }
858}
859
860
861
862
863static void __touch_mnt_namespace(struct mnt_namespace *ns)
864{
865 if (ns && ns->event != event) {
866 ns->event = event;
867 wake_up_interruptible(&ns->poll);
868 }
869}
870
871
872
873
874static void unhash_mnt(struct mount *mnt)
875{
876 mnt->mnt_parent = mnt;
877 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
878 list_del_init(&mnt->mnt_child);
879 hlist_del_init_rcu(&mnt->mnt_hash);
880 hlist_del_init(&mnt->mnt_mp_list);
881 put_mountpoint(mnt->mnt_mp);
882 mnt->mnt_mp = NULL;
883}
884
885
886
887
888static void detach_mnt(struct mount *mnt, struct path *old_path)
889{
890 old_path->dentry = mnt->mnt_mountpoint;
891 old_path->mnt = &mnt->mnt_parent->mnt;
892 unhash_mnt(mnt);
893}
894
895
896
897
898static void umount_mnt(struct mount *mnt)
899{
900
901 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
902 unhash_mnt(mnt);
903}
904
905
906
907
908void mnt_set_mountpoint(struct mount *mnt,
909 struct mountpoint *mp,
910 struct mount *child_mnt)
911{
912 mp->m_count++;
913 mnt_add_count(mnt, 1);
914 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
915 child_mnt->mnt_parent = mnt;
916 child_mnt->mnt_mp = mp;
917 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
918}
919
920static void __attach_mnt(struct mount *mnt, struct mount *parent)
921{
922 hlist_add_head_rcu(&mnt->mnt_hash,
923 m_hash(&parent->mnt, mnt->mnt_mountpoint));
924 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
925}
926
927
928
929
930static void attach_mnt(struct mount *mnt,
931 struct mount *parent,
932 struct mountpoint *mp)
933{
934 mnt_set_mountpoint(parent, mp, mnt);
935 __attach_mnt(mnt, parent);
936}
937
938void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
939{
940 struct mountpoint *old_mp = mnt->mnt_mp;
941 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
942 struct mount *old_parent = mnt->mnt_parent;
943
944 list_del_init(&mnt->mnt_child);
945 hlist_del_init(&mnt->mnt_mp_list);
946 hlist_del_init_rcu(&mnt->mnt_hash);
947
948 attach_mnt(mnt, parent, mp);
949
950 put_mountpoint(old_mp);
951
952
953
954
955
956
957
958
959
960
961
962
963 spin_lock(&old_mountpoint->d_lock);
964 old_mountpoint->d_lockref.count--;
965 spin_unlock(&old_mountpoint->d_lock);
966
967 mnt_add_count(old_parent, -1);
968}
969
970
971
972
973static void commit_tree(struct mount *mnt)
974{
975 struct mount *parent = mnt->mnt_parent;
976 struct mount *m;
977 LIST_HEAD(head);
978 struct mnt_namespace *n = parent->mnt_ns;
979
980 BUG_ON(parent == mnt);
981
982 list_add_tail(&head, &mnt->mnt_list);
983 list_for_each_entry(m, &head, mnt_list)
984 m->mnt_ns = n;
985
986 list_splice(&head, n->list.prev);
987
988 n->mounts += n->pending_mounts;
989 n->pending_mounts = 0;
990
991 __attach_mnt(mnt, parent);
992 touch_mnt_namespace(n);
993}
994
995static struct mount *next_mnt(struct mount *p, struct mount *root)
996{
997 struct list_head *next = p->mnt_mounts.next;
998 if (next == &p->mnt_mounts) {
999 while (1) {
1000 if (p == root)
1001 return NULL;
1002 next = p->mnt_child.next;
1003 if (next != &p->mnt_parent->mnt_mounts)
1004 break;
1005 p = p->mnt_parent;
1006 }
1007 }
1008 return list_entry(next, struct mount, mnt_child);
1009}
1010
1011static struct mount *skip_mnt_tree(struct mount *p)
1012{
1013 struct list_head *prev = p->mnt_mounts.prev;
1014 while (prev != &p->mnt_mounts) {
1015 p = list_entry(prev, struct mount, mnt_child);
1016 prev = p->mnt_mounts.prev;
1017 }
1018 return p;
1019}
1020
1021struct vfsmount *
1022vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
1023{
1024 struct mount *mnt;
1025 struct dentry *root;
1026
1027 if (!type)
1028 return ERR_PTR(-ENODEV);
1029
1030 mnt = alloc_vfsmnt(name);
1031 if (!mnt)
1032 return ERR_PTR(-ENOMEM);
1033
1034 if (flags & SB_KERNMOUNT)
1035 mnt->mnt.mnt_flags = MNT_INTERNAL;
1036
1037 root = mount_fs(type, flags, name, data);
1038 if (IS_ERR(root)) {
1039 mnt_free_id(mnt);
1040 free_vfsmnt(mnt);
1041 return ERR_CAST(root);
1042 }
1043
1044 mnt->mnt.mnt_root = root;
1045 mnt->mnt.mnt_sb = root->d_sb;
1046 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1047 mnt->mnt_parent = mnt;
1048 lock_mount_hash();
1049 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
1050 unlock_mount_hash();
1051 return &mnt->mnt;
1052}
1053EXPORT_SYMBOL_GPL(vfs_kern_mount);
1054
1055struct vfsmount *
1056vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1057 const char *name, void *data)
1058{
1059
1060
1061
1062
1063 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1064 return ERR_PTR(-EPERM);
1065
1066 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1067}
1068EXPORT_SYMBOL_GPL(vfs_submount);
1069
1070static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1071 int flag)
1072{
1073 struct super_block *sb = old->mnt.mnt_sb;
1074 struct mount *mnt;
1075 int err;
1076
1077 mnt = alloc_vfsmnt(old->mnt_devname);
1078 if (!mnt)
1079 return ERR_PTR(-ENOMEM);
1080
1081 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1082 mnt->mnt_group_id = 0;
1083 else
1084 mnt->mnt_group_id = old->mnt_group_id;
1085
1086 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1087 err = mnt_alloc_group_id(mnt);
1088 if (err)
1089 goto out_free;
1090 }
1091
1092 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1093 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1094
1095 if (flag & CL_UNPRIVILEGED) {
1096 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
1097
1098 if (mnt->mnt.mnt_flags & MNT_READONLY)
1099 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
1100
1101 if (mnt->mnt.mnt_flags & MNT_NODEV)
1102 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
1103
1104 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1105 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1106
1107 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1108 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1109 }
1110
1111
1112 if ((flag & CL_UNPRIVILEGED) &&
1113 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1114 mnt->mnt.mnt_flags |= MNT_LOCKED;
1115
1116 atomic_inc(&sb->s_active);
1117 mnt->mnt.mnt_sb = sb;
1118 mnt->mnt.mnt_root = dget(root);
1119 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1120 mnt->mnt_parent = mnt;
1121 lock_mount_hash();
1122 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1123 unlock_mount_hash();
1124
1125 if ((flag & CL_SLAVE) ||
1126 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1127 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1128 mnt->mnt_master = old;
1129 CLEAR_MNT_SHARED(mnt);
1130 } else if (!(flag & CL_PRIVATE)) {
1131 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1132 list_add(&mnt->mnt_share, &old->mnt_share);
1133 if (IS_MNT_SLAVE(old))
1134 list_add(&mnt->mnt_slave, &old->mnt_slave);
1135 mnt->mnt_master = old->mnt_master;
1136 } else {
1137 CLEAR_MNT_SHARED(mnt);
1138 }
1139 if (flag & CL_MAKE_SHARED)
1140 set_mnt_shared(mnt);
1141
1142
1143
1144 if (flag & CL_EXPIRE) {
1145 if (!list_empty(&old->mnt_expire))
1146 list_add(&mnt->mnt_expire, &old->mnt_expire);
1147 }
1148
1149 return mnt;
1150
1151 out_free:
1152 mnt_free_id(mnt);
1153 free_vfsmnt(mnt);
1154 return ERR_PTR(err);
1155}
1156
1157static void cleanup_mnt(struct mount *mnt)
1158{
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 WARN_ON(mnt_get_writers(mnt));
1170 if (unlikely(mnt->mnt_pins.first))
1171 mnt_pin_kill(mnt);
1172 fsnotify_vfsmount_delete(&mnt->mnt);
1173 dput(mnt->mnt.mnt_root);
1174 deactivate_super(mnt->mnt.mnt_sb);
1175 mnt_free_id(mnt);
1176 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1177}
1178
1179static void __cleanup_mnt(struct rcu_head *head)
1180{
1181 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1182}
1183
1184static LLIST_HEAD(delayed_mntput_list);
1185static void delayed_mntput(struct work_struct *unused)
1186{
1187 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1188 struct mount *m, *t;
1189
1190 llist_for_each_entry_safe(m, t, node, mnt_llist)
1191 cleanup_mnt(m);
1192}
1193static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1194
1195static void mntput_no_expire(struct mount *mnt)
1196{
1197 rcu_read_lock();
1198 mnt_add_count(mnt, -1);
1199 if (likely(mnt->mnt_ns)) {
1200 rcu_read_unlock();
1201 return;
1202 }
1203 lock_mount_hash();
1204 if (mnt_get_count(mnt)) {
1205 rcu_read_unlock();
1206 unlock_mount_hash();
1207 return;
1208 }
1209 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1210 rcu_read_unlock();
1211 unlock_mount_hash();
1212 return;
1213 }
1214 mnt->mnt.mnt_flags |= MNT_DOOMED;
1215 rcu_read_unlock();
1216
1217 list_del(&mnt->mnt_instance);
1218
1219 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1220 struct mount *p, *tmp;
1221 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1222 umount_mnt(p);
1223 }
1224 }
1225 unlock_mount_hash();
1226
1227 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1228 struct task_struct *task = current;
1229 if (likely(!(task->flags & PF_KTHREAD))) {
1230 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1231 if (!task_work_add(task, &mnt->mnt_rcu, true))
1232 return;
1233 }
1234 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1235 schedule_delayed_work(&delayed_mntput_work, 1);
1236 return;
1237 }
1238 cleanup_mnt(mnt);
1239}
1240
1241void mntput(struct vfsmount *mnt)
1242{
1243 if (mnt) {
1244 struct mount *m = real_mount(mnt);
1245
1246 if (unlikely(m->mnt_expiry_mark))
1247 m->mnt_expiry_mark = 0;
1248 mntput_no_expire(m);
1249 }
1250}
1251EXPORT_SYMBOL(mntput);
1252
1253struct vfsmount *mntget(struct vfsmount *mnt)
1254{
1255 if (mnt)
1256 mnt_add_count(real_mount(mnt), 1);
1257 return mnt;
1258}
1259EXPORT_SYMBOL(mntget);
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271bool path_is_mountpoint(const struct path *path)
1272{
1273 unsigned seq;
1274 bool res;
1275
1276 if (!d_mountpoint(path->dentry))
1277 return false;
1278
1279 rcu_read_lock();
1280 do {
1281 seq = read_seqbegin(&mount_lock);
1282 res = __path_is_mountpoint(path);
1283 } while (read_seqretry(&mount_lock, seq));
1284 rcu_read_unlock();
1285
1286 return res;
1287}
1288EXPORT_SYMBOL(path_is_mountpoint);
1289
1290struct vfsmount *mnt_clone_internal(const struct path *path)
1291{
1292 struct mount *p;
1293 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1294 if (IS_ERR(p))
1295 return ERR_CAST(p);
1296 p->mnt.mnt_flags |= MNT_INTERNAL;
1297 return &p->mnt;
1298}
1299
1300#ifdef CONFIG_PROC_FS
1301
1302static void *m_start(struct seq_file *m, loff_t *pos)
1303{
1304 struct proc_mounts *p = m->private;
1305
1306 down_read(&namespace_sem);
1307 if (p->cached_event == p->ns->event) {
1308 void *v = p->cached_mount;
1309 if (*pos == p->cached_index)
1310 return v;
1311 if (*pos == p->cached_index + 1) {
1312 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1313 return p->cached_mount = v;
1314 }
1315 }
1316
1317 p->cached_event = p->ns->event;
1318 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1319 p->cached_index = *pos;
1320 return p->cached_mount;
1321}
1322
1323static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1324{
1325 struct proc_mounts *p = m->private;
1326
1327 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1328 p->cached_index = *pos;
1329 return p->cached_mount;
1330}
1331
1332static void m_stop(struct seq_file *m, void *v)
1333{
1334 up_read(&namespace_sem);
1335}
1336
1337static int m_show(struct seq_file *m, void *v)
1338{
1339 struct proc_mounts *p = m->private;
1340 struct mount *r = list_entry(v, struct mount, mnt_list);
1341 return p->show(m, &r->mnt);
1342}
1343
1344const struct seq_operations mounts_op = {
1345 .start = m_start,
1346 .next = m_next,
1347 .stop = m_stop,
1348 .show = m_show,
1349};
1350#endif
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360int may_umount_tree(struct vfsmount *m)
1361{
1362 struct mount *mnt = real_mount(m);
1363 int actual_refs = 0;
1364 int minimum_refs = 0;
1365 struct mount *p;
1366 BUG_ON(!m);
1367
1368
1369 lock_mount_hash();
1370 for (p = mnt; p; p = next_mnt(p, mnt)) {
1371 actual_refs += mnt_get_count(p);
1372 minimum_refs += 2;
1373 }
1374 unlock_mount_hash();
1375
1376 if (actual_refs > minimum_refs)
1377 return 0;
1378
1379 return 1;
1380}
1381
1382EXPORT_SYMBOL(may_umount_tree);
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397int may_umount(struct vfsmount *mnt)
1398{
1399 int ret = 1;
1400 down_read(&namespace_sem);
1401 lock_mount_hash();
1402 if (propagate_mount_busy(real_mount(mnt), 2))
1403 ret = 0;
1404 unlock_mount_hash();
1405 up_read(&namespace_sem);
1406 return ret;
1407}
1408
1409EXPORT_SYMBOL(may_umount);
1410
1411static HLIST_HEAD(unmounted);
1412
1413static void namespace_unlock(void)
1414{
1415 struct hlist_head head;
1416
1417 hlist_move_list(&unmounted, &head);
1418
1419 up_write(&namespace_sem);
1420
1421 if (likely(hlist_empty(&head)))
1422 return;
1423
1424 synchronize_rcu();
1425
1426 group_pin_kill(&head);
1427}
1428
1429static inline void namespace_lock(void)
1430{
1431 down_write(&namespace_sem);
1432}
1433
1434enum umount_tree_flags {
1435 UMOUNT_SYNC = 1,
1436 UMOUNT_PROPAGATE = 2,
1437 UMOUNT_CONNECTED = 4,
1438};
1439
1440static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1441{
1442
1443 if (how & UMOUNT_SYNC)
1444 return true;
1445
1446
1447 if (!mnt_has_parent(mnt))
1448 return true;
1449
1450
1451
1452
1453
1454 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1455 return true;
1456
1457
1458 if (how & UMOUNT_CONNECTED)
1459 return false;
1460
1461
1462 if (IS_MNT_LOCKED(mnt))
1463 return false;
1464
1465
1466 return true;
1467}
1468
1469
1470
1471
1472
1473static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1474{
1475 LIST_HEAD(tmp_list);
1476 struct mount *p;
1477
1478 if (how & UMOUNT_PROPAGATE)
1479 propagate_mount_unlock(mnt);
1480
1481
1482 for (p = mnt; p; p = next_mnt(p, mnt)) {
1483 p->mnt.mnt_flags |= MNT_UMOUNT;
1484 list_move(&p->mnt_list, &tmp_list);
1485 }
1486
1487
1488 list_for_each_entry(p, &tmp_list, mnt_list) {
1489 list_del_init(&p->mnt_child);
1490 }
1491
1492
1493 if (how & UMOUNT_PROPAGATE)
1494 propagate_umount(&tmp_list);
1495
1496 while (!list_empty(&tmp_list)) {
1497 struct mnt_namespace *ns;
1498 bool disconnect;
1499 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1500 list_del_init(&p->mnt_expire);
1501 list_del_init(&p->mnt_list);
1502 ns = p->mnt_ns;
1503 if (ns) {
1504 ns->mounts--;
1505 __touch_mnt_namespace(ns);
1506 }
1507 p->mnt_ns = NULL;
1508 if (how & UMOUNT_SYNC)
1509 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1510
1511 disconnect = disconnect_mount(p, how);
1512
1513 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1514 disconnect ? &unmounted : NULL);
1515 if (mnt_has_parent(p)) {
1516 mnt_add_count(p->mnt_parent, -1);
1517 if (!disconnect) {
1518
1519 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1520 } else {
1521 umount_mnt(p);
1522 }
1523 }
1524 change_mnt_propagation(p, MS_PRIVATE);
1525 }
1526}
1527
1528static void shrink_submounts(struct mount *mnt);
1529
1530static int do_umount(struct mount *mnt, int flags)
1531{
1532 struct super_block *sb = mnt->mnt.mnt_sb;
1533 int retval;
1534
1535 retval = security_sb_umount(&mnt->mnt, flags);
1536 if (retval)
1537 return retval;
1538
1539
1540
1541
1542
1543
1544
1545 if (flags & MNT_EXPIRE) {
1546 if (&mnt->mnt == current->fs->root.mnt ||
1547 flags & (MNT_FORCE | MNT_DETACH))
1548 return -EINVAL;
1549
1550
1551
1552
1553
1554 lock_mount_hash();
1555 if (mnt_get_count(mnt) != 2) {
1556 unlock_mount_hash();
1557 return -EBUSY;
1558 }
1559 unlock_mount_hash();
1560
1561 if (!xchg(&mnt->mnt_expiry_mark, 1))
1562 return -EAGAIN;
1563 }
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1576 sb->s_op->umount_begin(sb);
1577 }
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1589
1590
1591
1592
1593 if (!capable(CAP_SYS_ADMIN))
1594 return -EPERM;
1595 down_write(&sb->s_umount);
1596 if (!sb_rdonly(sb))
1597 retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
1598 up_write(&sb->s_umount);
1599 return retval;
1600 }
1601
1602 namespace_lock();
1603 lock_mount_hash();
1604 event++;
1605
1606 if (flags & MNT_DETACH) {
1607 if (!list_empty(&mnt->mnt_list))
1608 umount_tree(mnt, UMOUNT_PROPAGATE);
1609 retval = 0;
1610 } else {
1611 shrink_submounts(mnt);
1612 retval = -EBUSY;
1613 if (!propagate_mount_busy(mnt, 2)) {
1614 if (!list_empty(&mnt->mnt_list))
1615 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1616 retval = 0;
1617 }
1618 }
1619 unlock_mount_hash();
1620 namespace_unlock();
1621 return retval;
1622}
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634void __detach_mounts(struct dentry *dentry)
1635{
1636 struct mountpoint *mp;
1637 struct mount *mnt;
1638
1639 namespace_lock();
1640 lock_mount_hash();
1641 mp = lookup_mountpoint(dentry);
1642 if (IS_ERR_OR_NULL(mp))
1643 goto out_unlock;
1644
1645 event++;
1646 while (!hlist_empty(&mp->m_list)) {
1647 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1648 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1649 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1650 umount_mnt(mnt);
1651 }
1652 else umount_tree(mnt, UMOUNT_CONNECTED);
1653 }
1654 put_mountpoint(mp);
1655out_unlock:
1656 unlock_mount_hash();
1657 namespace_unlock();
1658}
1659
1660
1661
1662
1663static inline bool may_mount(void)
1664{
1665 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1666}
1667
1668static inline bool may_mandlock(void)
1669{
1670#ifndef CONFIG_MANDATORY_FILE_LOCKING
1671 return false;
1672#endif
1673 return capable(CAP_SYS_ADMIN);
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684int ksys_umount(char __user *name, int flags)
1685{
1686 struct path path;
1687 struct mount *mnt;
1688 int retval;
1689 int lookup_flags = 0;
1690
1691 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1692 return -EINVAL;
1693
1694 if (!may_mount())
1695 return -EPERM;
1696
1697 if (!(flags & UMOUNT_NOFOLLOW))
1698 lookup_flags |= LOOKUP_FOLLOW;
1699
1700 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1701 if (retval)
1702 goto out;
1703 mnt = real_mount(path.mnt);
1704 retval = -EINVAL;
1705 if (path.dentry != path.mnt->mnt_root)
1706 goto dput_and_out;
1707 if (!check_mnt(mnt))
1708 goto dput_and_out;
1709 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1710 goto dput_and_out;
1711 retval = -EPERM;
1712 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1713 goto dput_and_out;
1714
1715 retval = do_umount(mnt, flags);
1716dput_and_out:
1717
1718 dput(path.dentry);
1719 mntput_no_expire(mnt);
1720out:
1721 return retval;
1722}
1723
1724SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1725{
1726 return ksys_umount(name, flags);
1727}
1728
1729#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1730
1731
1732
1733
1734SYSCALL_DEFINE1(oldumount, char __user *, name)
1735{
1736 return ksys_umount(name, 0);
1737}
1738
1739#endif
1740
1741static bool is_mnt_ns_file(struct dentry *dentry)
1742{
1743
1744 return dentry->d_op == &ns_dentry_operations &&
1745 dentry->d_fsdata == &mntns_operations;
1746}
1747
1748struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1749{
1750 return container_of(ns, struct mnt_namespace, ns);
1751}
1752
1753static bool mnt_ns_loop(struct dentry *dentry)
1754{
1755
1756
1757
1758 struct mnt_namespace *mnt_ns;
1759 if (!is_mnt_ns_file(dentry))
1760 return false;
1761
1762 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1763 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1764}
1765
1766struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1767 int flag)
1768{
1769 struct mount *res, *p, *q, *r, *parent;
1770
1771 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1772 return ERR_PTR(-EINVAL);
1773
1774 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1775 return ERR_PTR(-EINVAL);
1776
1777 res = q = clone_mnt(mnt, dentry, flag);
1778 if (IS_ERR(q))
1779 return q;
1780
1781 q->mnt_mountpoint = mnt->mnt_mountpoint;
1782
1783 p = mnt;
1784 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1785 struct mount *s;
1786 if (!is_subdir(r->mnt_mountpoint, dentry))
1787 continue;
1788
1789 for (s = r; s; s = next_mnt(s, r)) {
1790 if (!(flag & CL_COPY_UNBINDABLE) &&
1791 IS_MNT_UNBINDABLE(s)) {
1792 s = skip_mnt_tree(s);
1793 continue;
1794 }
1795 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1796 is_mnt_ns_file(s->mnt.mnt_root)) {
1797 s = skip_mnt_tree(s);
1798 continue;
1799 }
1800 while (p != s->mnt_parent) {
1801 p = p->mnt_parent;
1802 q = q->mnt_parent;
1803 }
1804 p = s;
1805 parent = q;
1806 q = clone_mnt(p, p->mnt.mnt_root, flag);
1807 if (IS_ERR(q))
1808 goto out;
1809 lock_mount_hash();
1810 list_add_tail(&q->mnt_list, &res->mnt_list);
1811 attach_mnt(q, parent, p->mnt_mp);
1812 unlock_mount_hash();
1813 }
1814 }
1815 return res;
1816out:
1817 if (res) {
1818 lock_mount_hash();
1819 umount_tree(res, UMOUNT_SYNC);
1820 unlock_mount_hash();
1821 }
1822 return q;
1823}
1824
1825
1826
1827struct vfsmount *collect_mounts(const struct path *path)
1828{
1829 struct mount *tree;
1830 namespace_lock();
1831 if (!check_mnt(real_mount(path->mnt)))
1832 tree = ERR_PTR(-EINVAL);
1833 else
1834 tree = copy_tree(real_mount(path->mnt), path->dentry,
1835 CL_COPY_ALL | CL_PRIVATE);
1836 namespace_unlock();
1837 if (IS_ERR(tree))
1838 return ERR_CAST(tree);
1839 return &tree->mnt;
1840}
1841
1842void drop_collected_mounts(struct vfsmount *mnt)
1843{
1844 namespace_lock();
1845 lock_mount_hash();
1846 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1847 unlock_mount_hash();
1848 namespace_unlock();
1849}
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860struct vfsmount *clone_private_mount(const struct path *path)
1861{
1862 struct mount *old_mnt = real_mount(path->mnt);
1863 struct mount *new_mnt;
1864
1865 if (IS_MNT_UNBINDABLE(old_mnt))
1866 return ERR_PTR(-EINVAL);
1867
1868 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1869 if (IS_ERR(new_mnt))
1870 return ERR_CAST(new_mnt);
1871
1872 return &new_mnt->mnt;
1873}
1874EXPORT_SYMBOL_GPL(clone_private_mount);
1875
1876int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1877 struct vfsmount *root)
1878{
1879 struct mount *mnt;
1880 int res = f(root, arg);
1881 if (res)
1882 return res;
1883 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1884 res = f(&mnt->mnt, arg);
1885 if (res)
1886 return res;
1887 }
1888 return 0;
1889}
1890
1891static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1892{
1893 struct mount *p;
1894
1895 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1896 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1897 mnt_release_group_id(p);
1898 }
1899}
1900
1901static int invent_group_ids(struct mount *mnt, bool recurse)
1902{
1903 struct mount *p;
1904
1905 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1906 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1907 int err = mnt_alloc_group_id(p);
1908 if (err) {
1909 cleanup_group_ids(mnt, p);
1910 return err;
1911 }
1912 }
1913 }
1914
1915 return 0;
1916}
1917
1918int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1919{
1920 unsigned int max = READ_ONCE(sysctl_mount_max);
1921 unsigned int mounts = 0, old, pending, sum;
1922 struct mount *p;
1923
1924 for (p = mnt; p; p = next_mnt(p, mnt))
1925 mounts++;
1926
1927 old = ns->mounts;
1928 pending = ns->pending_mounts;
1929 sum = old + pending;
1930 if ((old > sum) ||
1931 (pending > sum) ||
1932 (max < sum) ||
1933 (mounts > (max - sum)))
1934 return -ENOSPC;
1935
1936 ns->pending_mounts = pending + mounts;
1937 return 0;
1938}
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003static int attach_recursive_mnt(struct mount *source_mnt,
2004 struct mount *dest_mnt,
2005 struct mountpoint *dest_mp,
2006 struct path *parent_path)
2007{
2008 HLIST_HEAD(tree_list);
2009 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2010 struct mountpoint *smp;
2011 struct mount *child, *p;
2012 struct hlist_node *n;
2013 int err;
2014
2015
2016
2017
2018 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2019 if (IS_ERR(smp))
2020 return PTR_ERR(smp);
2021
2022
2023 if (!parent_path) {
2024 err = count_mounts(ns, source_mnt);
2025 if (err)
2026 goto out;
2027 }
2028
2029 if (IS_MNT_SHARED(dest_mnt)) {
2030 err = invent_group_ids(source_mnt, true);
2031 if (err)
2032 goto out;
2033 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2034 lock_mount_hash();
2035 if (err)
2036 goto out_cleanup_ids;
2037 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2038 set_mnt_shared(p);
2039 } else {
2040 lock_mount_hash();
2041 }
2042 if (parent_path) {
2043 detach_mnt(source_mnt, parent_path);
2044 attach_mnt(source_mnt, dest_mnt, dest_mp);
2045 touch_mnt_namespace(source_mnt->mnt_ns);
2046 } else {
2047 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2048 commit_tree(source_mnt);
2049 }
2050
2051 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2052 struct mount *q;
2053 hlist_del_init(&child->mnt_hash);
2054 q = __lookup_mnt(&child->mnt_parent->mnt,
2055 child->mnt_mountpoint);
2056 if (q)
2057 mnt_change_mountpoint(child, smp, q);
2058 commit_tree(child);
2059 }
2060 put_mountpoint(smp);
2061 unlock_mount_hash();
2062
2063 return 0;
2064
2065 out_cleanup_ids:
2066 while (!hlist_empty(&tree_list)) {
2067 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2068 child->mnt_parent->mnt_ns->pending_mounts = 0;
2069 umount_tree(child, UMOUNT_SYNC);
2070 }
2071 unlock_mount_hash();
2072 cleanup_group_ids(source_mnt, NULL);
2073 out:
2074 ns->pending_mounts = 0;
2075
2076 read_seqlock_excl(&mount_lock);
2077 put_mountpoint(smp);
2078 read_sequnlock_excl(&mount_lock);
2079
2080 return err;
2081}
2082
2083static struct mountpoint *lock_mount(struct path *path)
2084{
2085 struct vfsmount *mnt;
2086 struct dentry *dentry = path->dentry;
2087retry:
2088 inode_lock(dentry->d_inode);
2089 if (unlikely(cant_mount(dentry))) {
2090 inode_unlock(dentry->d_inode);
2091 return ERR_PTR(-ENOENT);
2092 }
2093 namespace_lock();
2094 mnt = lookup_mnt(path);
2095 if (likely(!mnt)) {
2096 struct mountpoint *mp = get_mountpoint(dentry);
2097 if (IS_ERR(mp)) {
2098 namespace_unlock();
2099 inode_unlock(dentry->d_inode);
2100 return mp;
2101 }
2102 return mp;
2103 }
2104 namespace_unlock();
2105 inode_unlock(path->dentry->d_inode);
2106 path_put(path);
2107 path->mnt = mnt;
2108 dentry = path->dentry = dget(mnt->mnt_root);
2109 goto retry;
2110}
2111
2112static void unlock_mount(struct mountpoint *where)
2113{
2114 struct dentry *dentry = where->m_dentry;
2115
2116 read_seqlock_excl(&mount_lock);
2117 put_mountpoint(where);
2118 read_sequnlock_excl(&mount_lock);
2119
2120 namespace_unlock();
2121 inode_unlock(dentry->d_inode);
2122}
2123
2124static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2125{
2126 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2127 return -EINVAL;
2128
2129 if (d_is_dir(mp->m_dentry) !=
2130 d_is_dir(mnt->mnt.mnt_root))
2131 return -ENOTDIR;
2132
2133 return attach_recursive_mnt(mnt, p, mp, NULL);
2134}
2135
2136
2137
2138
2139
2140static int flags_to_propagation_type(int ms_flags)
2141{
2142 int type = ms_flags & ~(MS_REC | MS_SILENT);
2143
2144
2145 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2146 return 0;
2147
2148 if (!is_power_of_2(type))
2149 return 0;
2150 return type;
2151}
2152
2153
2154
2155
2156static int do_change_type(struct path *path, int ms_flags)
2157{
2158 struct mount *m;
2159 struct mount *mnt = real_mount(path->mnt);
2160 int recurse = ms_flags & MS_REC;
2161 int type;
2162 int err = 0;
2163
2164 if (path->dentry != path->mnt->mnt_root)
2165 return -EINVAL;
2166
2167 type = flags_to_propagation_type(ms_flags);
2168 if (!type)
2169 return -EINVAL;
2170
2171 namespace_lock();
2172 if (type == MS_SHARED) {
2173 err = invent_group_ids(mnt, recurse);
2174 if (err)
2175 goto out_unlock;
2176 }
2177
2178 lock_mount_hash();
2179 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2180 change_mnt_propagation(m, type);
2181 unlock_mount_hash();
2182
2183 out_unlock:
2184 namespace_unlock();
2185 return err;
2186}
2187
2188static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2189{
2190 struct mount *child;
2191 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2192 if (!is_subdir(child->mnt_mountpoint, dentry))
2193 continue;
2194
2195 if (child->mnt.mnt_flags & MNT_LOCKED)
2196 return true;
2197 }
2198 return false;
2199}
2200
2201
2202
2203
2204static int do_loopback(struct path *path, const char *old_name,
2205 int recurse)
2206{
2207 struct path old_path;
2208 struct mount *mnt = NULL, *old, *parent;
2209 struct mountpoint *mp;
2210 int err;
2211 if (!old_name || !*old_name)
2212 return -EINVAL;
2213 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2214 if (err)
2215 return err;
2216
2217 err = -EINVAL;
2218 if (mnt_ns_loop(old_path.dentry))
2219 goto out;
2220
2221 mp = lock_mount(path);
2222 err = PTR_ERR(mp);
2223 if (IS_ERR(mp))
2224 goto out;
2225
2226 old = real_mount(old_path.mnt);
2227 parent = real_mount(path->mnt);
2228
2229 err = -EINVAL;
2230 if (IS_MNT_UNBINDABLE(old))
2231 goto out2;
2232
2233 if (!check_mnt(parent))
2234 goto out2;
2235
2236 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2237 goto out2;
2238
2239 if (!recurse && has_locked_children(old, old_path.dentry))
2240 goto out2;
2241
2242 if (recurse)
2243 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2244 else
2245 mnt = clone_mnt(old, old_path.dentry, 0);
2246
2247 if (IS_ERR(mnt)) {
2248 err = PTR_ERR(mnt);
2249 goto out2;
2250 }
2251
2252 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2253
2254 err = graft_tree(mnt, parent, mp);
2255 if (err) {
2256 lock_mount_hash();
2257 umount_tree(mnt, UMOUNT_SYNC);
2258 unlock_mount_hash();
2259 }
2260out2:
2261 unlock_mount(mp);
2262out:
2263 path_put(&old_path);
2264 return err;
2265}
2266
2267static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2268{
2269 int error = 0;
2270 int readonly_request = 0;
2271
2272 if (ms_flags & MS_RDONLY)
2273 readonly_request = 1;
2274 if (readonly_request == __mnt_is_readonly(mnt))
2275 return 0;
2276
2277 if (readonly_request)
2278 error = mnt_make_readonly(real_mount(mnt));
2279 else
2280 __mnt_unmake_readonly(real_mount(mnt));
2281 return error;
2282}
2283
2284
2285
2286
2287
2288
2289static int do_remount(struct path *path, int ms_flags, int sb_flags,
2290 int mnt_flags, void *data)
2291{
2292 int err;
2293 struct super_block *sb = path->mnt->mnt_sb;
2294 struct mount *mnt = real_mount(path->mnt);
2295
2296 if (!check_mnt(mnt))
2297 return -EINVAL;
2298
2299 if (path->dentry != path->mnt->mnt_root)
2300 return -EINVAL;
2301
2302
2303
2304
2305
2306
2307
2308 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2309 !(mnt_flags & MNT_READONLY)) {
2310 return -EPERM;
2311 }
2312 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2313 !(mnt_flags & MNT_NODEV)) {
2314 return -EPERM;
2315 }
2316 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2317 !(mnt_flags & MNT_NOSUID)) {
2318 return -EPERM;
2319 }
2320 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2321 !(mnt_flags & MNT_NOEXEC)) {
2322 return -EPERM;
2323 }
2324 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2325 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2326 return -EPERM;
2327 }
2328
2329 err = security_sb_remount(sb, data);
2330 if (err)
2331 return err;
2332
2333 down_write(&sb->s_umount);
2334 if (ms_flags & MS_BIND)
2335 err = change_mount_flags(path->mnt, ms_flags);
2336 else if (!capable(CAP_SYS_ADMIN))
2337 err = -EPERM;
2338 else
2339 err = do_remount_sb(sb, sb_flags, data, 0);
2340 if (!err) {
2341 lock_mount_hash();
2342 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2343 mnt->mnt.mnt_flags = mnt_flags;
2344 touch_mnt_namespace(mnt->mnt_ns);
2345 unlock_mount_hash();
2346 }
2347 up_write(&sb->s_umount);
2348 return err;
2349}
2350
2351static inline int tree_contains_unbindable(struct mount *mnt)
2352{
2353 struct mount *p;
2354 for (p = mnt; p; p = next_mnt(p, mnt)) {
2355 if (IS_MNT_UNBINDABLE(p))
2356 return 1;
2357 }
2358 return 0;
2359}
2360
2361static int do_move_mount(struct path *path, const char *old_name)
2362{
2363 struct path old_path, parent_path;
2364 struct mount *p;
2365 struct mount *old;
2366 struct mountpoint *mp;
2367 int err;
2368 if (!old_name || !*old_name)
2369 return -EINVAL;
2370 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2371 if (err)
2372 return err;
2373
2374 mp = lock_mount(path);
2375 err = PTR_ERR(mp);
2376 if (IS_ERR(mp))
2377 goto out;
2378
2379 old = real_mount(old_path.mnt);
2380 p = real_mount(path->mnt);
2381
2382 err = -EINVAL;
2383 if (!check_mnt(p) || !check_mnt(old))
2384 goto out1;
2385
2386 if (old->mnt.mnt_flags & MNT_LOCKED)
2387 goto out1;
2388
2389 err = -EINVAL;
2390 if (old_path.dentry != old_path.mnt->mnt_root)
2391 goto out1;
2392
2393 if (!mnt_has_parent(old))
2394 goto out1;
2395
2396 if (d_is_dir(path->dentry) !=
2397 d_is_dir(old_path.dentry))
2398 goto out1;
2399
2400
2401
2402 if (IS_MNT_SHARED(old->mnt_parent))
2403 goto out1;
2404
2405
2406
2407
2408 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2409 goto out1;
2410 err = -ELOOP;
2411 for (; mnt_has_parent(p); p = p->mnt_parent)
2412 if (p == old)
2413 goto out1;
2414
2415 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2416 if (err)
2417 goto out1;
2418
2419
2420
2421 list_del_init(&old->mnt_expire);
2422out1:
2423 unlock_mount(mp);
2424out:
2425 if (!err)
2426 path_put(&parent_path);
2427 path_put(&old_path);
2428 return err;
2429}
2430
2431static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2432{
2433 int err;
2434 const char *subtype = strchr(fstype, '.');
2435 if (subtype) {
2436 subtype++;
2437 err = -EINVAL;
2438 if (!subtype[0])
2439 goto err;
2440 } else
2441 subtype = "";
2442
2443 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2444 err = -ENOMEM;
2445 if (!mnt->mnt_sb->s_subtype)
2446 goto err;
2447 return mnt;
2448
2449 err:
2450 mntput(mnt);
2451 return ERR_PTR(err);
2452}
2453
2454
2455
2456
2457static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2458{
2459 struct mountpoint *mp;
2460 struct mount *parent;
2461 int err;
2462
2463 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2464
2465 mp = lock_mount(path);
2466 if (IS_ERR(mp))
2467 return PTR_ERR(mp);
2468
2469 parent = real_mount(path->mnt);
2470 err = -EINVAL;
2471 if (unlikely(!check_mnt(parent))) {
2472
2473 if (!(mnt_flags & MNT_SHRINKABLE))
2474 goto unlock;
2475
2476 if (!parent->mnt_ns)
2477 goto unlock;
2478 }
2479
2480
2481 err = -EBUSY;
2482 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2483 path->mnt->mnt_root == path->dentry)
2484 goto unlock;
2485
2486 err = -EINVAL;
2487 if (d_is_symlink(newmnt->mnt.mnt_root))
2488 goto unlock;
2489
2490 newmnt->mnt.mnt_flags = mnt_flags;
2491 err = graft_tree(newmnt, parent, mp);
2492
2493unlock:
2494 unlock_mount(mp);
2495 return err;
2496}
2497
2498static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
2499
2500
2501
2502
2503
2504static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2505 int mnt_flags, const char *name, void *data)
2506{
2507 struct file_system_type *type;
2508 struct vfsmount *mnt;
2509 int err;
2510
2511 if (!fstype)
2512 return -EINVAL;
2513
2514 type = get_fs_type(fstype);
2515 if (!type)
2516 return -ENODEV;
2517
2518 mnt = vfs_kern_mount(type, sb_flags, name, data);
2519 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2520 !mnt->mnt_sb->s_subtype)
2521 mnt = fs_set_subtype(mnt, fstype);
2522
2523 put_filesystem(type);
2524 if (IS_ERR(mnt))
2525 return PTR_ERR(mnt);
2526
2527 if (mount_too_revealing(mnt, &mnt_flags)) {
2528 mntput(mnt);
2529 return -EPERM;
2530 }
2531
2532 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2533 if (err)
2534 mntput(mnt);
2535 return err;
2536}
2537
2538int finish_automount(struct vfsmount *m, struct path *path)
2539{
2540 struct mount *mnt = real_mount(m);
2541 int err;
2542
2543
2544
2545 BUG_ON(mnt_get_count(mnt) < 2);
2546
2547 if (m->mnt_sb == path->mnt->mnt_sb &&
2548 m->mnt_root == path->dentry) {
2549 err = -ELOOP;
2550 goto fail;
2551 }
2552
2553 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2554 if (!err)
2555 return 0;
2556fail:
2557
2558 if (!list_empty(&mnt->mnt_expire)) {
2559 namespace_lock();
2560 list_del_init(&mnt->mnt_expire);
2561 namespace_unlock();
2562 }
2563 mntput(m);
2564 mntput(m);
2565 return err;
2566}
2567
2568
2569
2570
2571
2572
2573void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2574{
2575 namespace_lock();
2576
2577 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2578
2579 namespace_unlock();
2580}
2581EXPORT_SYMBOL(mnt_set_expiry);
2582
2583
2584
2585
2586
2587
2588void mark_mounts_for_expiry(struct list_head *mounts)
2589{
2590 struct mount *mnt, *next;
2591 LIST_HEAD(graveyard);
2592
2593 if (list_empty(mounts))
2594 return;
2595
2596 namespace_lock();
2597 lock_mount_hash();
2598
2599
2600
2601
2602
2603
2604
2605 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2606 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2607 propagate_mount_busy(mnt, 1))
2608 continue;
2609 list_move(&mnt->mnt_expire, &graveyard);
2610 }
2611 while (!list_empty(&graveyard)) {
2612 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2613 touch_mnt_namespace(mnt->mnt_ns);
2614 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2615 }
2616 unlock_mount_hash();
2617 namespace_unlock();
2618}
2619
2620EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2621
2622
2623
2624
2625
2626
2627
2628static int select_submounts(struct mount *parent, struct list_head *graveyard)
2629{
2630 struct mount *this_parent = parent;
2631 struct list_head *next;
2632 int found = 0;
2633
2634repeat:
2635 next = this_parent->mnt_mounts.next;
2636resume:
2637 while (next != &this_parent->mnt_mounts) {
2638 struct list_head *tmp = next;
2639 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2640
2641 next = tmp->next;
2642 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2643 continue;
2644
2645
2646
2647 if (!list_empty(&mnt->mnt_mounts)) {
2648 this_parent = mnt;
2649 goto repeat;
2650 }
2651
2652 if (!propagate_mount_busy(mnt, 1)) {
2653 list_move_tail(&mnt->mnt_expire, graveyard);
2654 found++;
2655 }
2656 }
2657
2658
2659
2660 if (this_parent != parent) {
2661 next = this_parent->mnt_child.next;
2662 this_parent = this_parent->mnt_parent;
2663 goto resume;
2664 }
2665 return found;
2666}
2667
2668
2669
2670
2671
2672
2673
2674static void shrink_submounts(struct mount *mnt)
2675{
2676 LIST_HEAD(graveyard);
2677 struct mount *m;
2678
2679
2680 while (select_submounts(mnt, &graveyard)) {
2681 while (!list_empty(&graveyard)) {
2682 m = list_first_entry(&graveyard, struct mount,
2683 mnt_expire);
2684 touch_mnt_namespace(m->mnt_ns);
2685 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2686 }
2687 }
2688}
2689
2690
2691
2692
2693
2694
2695
2696static long exact_copy_from_user(void *to, const void __user * from,
2697 unsigned long n)
2698{
2699 char *t = to;
2700 const char __user *f = from;
2701 char c;
2702
2703 if (!access_ok(VERIFY_READ, from, n))
2704 return n;
2705
2706 while (n) {
2707 if (__get_user(c, f)) {
2708 memset(t, 0, n);
2709 break;
2710 }
2711 *t++ = c;
2712 f++;
2713 n--;
2714 }
2715 return n;
2716}
2717
2718void *copy_mount_options(const void __user * data)
2719{
2720 int i;
2721 unsigned long size;
2722 char *copy;
2723
2724 if (!data)
2725 return NULL;
2726
2727 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2728 if (!copy)
2729 return ERR_PTR(-ENOMEM);
2730
2731
2732
2733
2734
2735
2736 size = TASK_SIZE - (unsigned long)data;
2737 if (size > PAGE_SIZE)
2738 size = PAGE_SIZE;
2739
2740 i = size - exact_copy_from_user(copy, data, size);
2741 if (!i) {
2742 kfree(copy);
2743 return ERR_PTR(-EFAULT);
2744 }
2745 if (i != PAGE_SIZE)
2746 memset(copy + i, 0, PAGE_SIZE - i);
2747 return copy;
2748}
2749
2750char *copy_mount_string(const void __user *data)
2751{
2752 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2753}
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769long do_mount(const char *dev_name, const char __user *dir_name,
2770 const char *type_page, unsigned long flags, void *data_page)
2771{
2772 struct path path;
2773 unsigned int mnt_flags = 0, sb_flags;
2774 int retval = 0;
2775
2776
2777 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2778 flags &= ~MS_MGC_MSK;
2779
2780
2781 if (data_page)
2782 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2783
2784 if (flags & MS_NOUSER)
2785 return -EINVAL;
2786
2787
2788 retval = user_path(dir_name, &path);
2789 if (retval)
2790 return retval;
2791
2792 retval = security_sb_mount(dev_name, &path,
2793 type_page, flags, data_page);
2794 if (!retval && !may_mount())
2795 retval = -EPERM;
2796 if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
2797 retval = -EPERM;
2798 if (retval)
2799 goto dput_out;
2800
2801
2802 if (!(flags & MS_NOATIME))
2803 mnt_flags |= MNT_RELATIME;
2804
2805
2806 if (flags & MS_NOSUID)
2807 mnt_flags |= MNT_NOSUID;
2808 if (flags & MS_NODEV)
2809 mnt_flags |= MNT_NODEV;
2810 if (flags & MS_NOEXEC)
2811 mnt_flags |= MNT_NOEXEC;
2812 if (flags & MS_NOATIME)
2813 mnt_flags |= MNT_NOATIME;
2814 if (flags & MS_NODIRATIME)
2815 mnt_flags |= MNT_NODIRATIME;
2816 if (flags & MS_STRICTATIME)
2817 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2818 if (flags & MS_RDONLY)
2819 mnt_flags |= MNT_READONLY;
2820
2821
2822 if ((flags & MS_REMOUNT) &&
2823 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2824 MS_STRICTATIME)) == 0)) {
2825 mnt_flags &= ~MNT_ATIME_MASK;
2826 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2827 }
2828
2829 sb_flags = flags & (SB_RDONLY |
2830 SB_SYNCHRONOUS |
2831 SB_MANDLOCK |
2832 SB_DIRSYNC |
2833 SB_SILENT |
2834 SB_POSIXACL |
2835 SB_LAZYTIME |
2836 SB_I_VERSION);
2837
2838 if (flags & MS_REMOUNT)
2839 retval = do_remount(&path, flags, sb_flags, mnt_flags,
2840 data_page);
2841 else if (flags & MS_BIND)
2842 retval = do_loopback(&path, dev_name, flags & MS_REC);
2843 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2844 retval = do_change_type(&path, flags);
2845 else if (flags & MS_MOVE)
2846 retval = do_move_mount(&path, dev_name);
2847 else
2848 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
2849 dev_name, data_page);
2850dput_out:
2851 path_put(&path);
2852 return retval;
2853}
2854
2855static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
2856{
2857 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
2858}
2859
2860static void dec_mnt_namespaces(struct ucounts *ucounts)
2861{
2862 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
2863}
2864
2865static void free_mnt_ns(struct mnt_namespace *ns)
2866{
2867 ns_free_inum(&ns->ns);
2868 dec_mnt_namespaces(ns->ucounts);
2869 put_user_ns(ns->user_ns);
2870 kfree(ns);
2871}
2872
2873
2874
2875
2876
2877
2878
2879
2880static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2881
2882static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2883{
2884 struct mnt_namespace *new_ns;
2885 struct ucounts *ucounts;
2886 int ret;
2887
2888 ucounts = inc_mnt_namespaces(user_ns);
2889 if (!ucounts)
2890 return ERR_PTR(-ENOSPC);
2891
2892 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2893 if (!new_ns) {
2894 dec_mnt_namespaces(ucounts);
2895 return ERR_PTR(-ENOMEM);
2896 }
2897 ret = ns_alloc_inum(&new_ns->ns);
2898 if (ret) {
2899 kfree(new_ns);
2900 dec_mnt_namespaces(ucounts);
2901 return ERR_PTR(ret);
2902 }
2903 new_ns->ns.ops = &mntns_operations;
2904 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2905 atomic_set(&new_ns->count, 1);
2906 new_ns->root = NULL;
2907 INIT_LIST_HEAD(&new_ns->list);
2908 init_waitqueue_head(&new_ns->poll);
2909 new_ns->event = 0;
2910 new_ns->user_ns = get_user_ns(user_ns);
2911 new_ns->ucounts = ucounts;
2912 new_ns->mounts = 0;
2913 new_ns->pending_mounts = 0;
2914 return new_ns;
2915}
2916
2917__latent_entropy
2918struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2919 struct user_namespace *user_ns, struct fs_struct *new_fs)
2920{
2921 struct mnt_namespace *new_ns;
2922 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2923 struct mount *p, *q;
2924 struct mount *old;
2925 struct mount *new;
2926 int copy_flags;
2927
2928 BUG_ON(!ns);
2929
2930 if (likely(!(flags & CLONE_NEWNS))) {
2931 get_mnt_ns(ns);
2932 return ns;
2933 }
2934
2935 old = ns->root;
2936
2937 new_ns = alloc_mnt_ns(user_ns);
2938 if (IS_ERR(new_ns))
2939 return new_ns;
2940
2941 namespace_lock();
2942
2943 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2944 if (user_ns != ns->user_ns)
2945 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2946 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2947 if (IS_ERR(new)) {
2948 namespace_unlock();
2949 free_mnt_ns(new_ns);
2950 return ERR_CAST(new);
2951 }
2952 new_ns->root = new;
2953 list_add_tail(&new_ns->list, &new->mnt_list);
2954
2955
2956
2957
2958
2959
2960 p = old;
2961 q = new;
2962 while (p) {
2963 q->mnt_ns = new_ns;
2964 new_ns->mounts++;
2965 if (new_fs) {
2966 if (&p->mnt == new_fs->root.mnt) {
2967 new_fs->root.mnt = mntget(&q->mnt);
2968 rootmnt = &p->mnt;
2969 }
2970 if (&p->mnt == new_fs->pwd.mnt) {
2971 new_fs->pwd.mnt = mntget(&q->mnt);
2972 pwdmnt = &p->mnt;
2973 }
2974 }
2975 p = next_mnt(p, old);
2976 q = next_mnt(q, new);
2977 if (!q)
2978 break;
2979 while (p->mnt.mnt_root != q->mnt.mnt_root)
2980 p = next_mnt(p, old);
2981 }
2982 namespace_unlock();
2983
2984 if (rootmnt)
2985 mntput(rootmnt);
2986 if (pwdmnt)
2987 mntput(pwdmnt);
2988
2989 return new_ns;
2990}
2991
2992
2993
2994
2995
2996static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2997{
2998 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2999 if (!IS_ERR(new_ns)) {
3000 struct mount *mnt = real_mount(m);
3001 mnt->mnt_ns = new_ns;
3002 new_ns->root = mnt;
3003 new_ns->mounts++;
3004 list_add(&mnt->mnt_list, &new_ns->list);
3005 } else {
3006 mntput(m);
3007 }
3008 return new_ns;
3009}
3010
3011struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
3012{
3013 struct mnt_namespace *ns;
3014 struct super_block *s;
3015 struct path path;
3016 int err;
3017
3018 ns = create_mnt_ns(mnt);
3019 if (IS_ERR(ns))
3020 return ERR_CAST(ns);
3021
3022 err = vfs_path_lookup(mnt->mnt_root, mnt,
3023 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3024
3025 put_mnt_ns(ns);
3026
3027 if (err)
3028 return ERR_PTR(err);
3029
3030
3031 s = path.mnt->mnt_sb;
3032 atomic_inc(&s->s_active);
3033 mntput(path.mnt);
3034
3035 down_write(&s->s_umount);
3036
3037 return path.dentry;
3038}
3039EXPORT_SYMBOL(mount_subtree);
3040
3041int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
3042 unsigned long flags, void __user *data)
3043{
3044 int ret;
3045 char *kernel_type;
3046 char *kernel_dev;
3047 void *options;
3048
3049 kernel_type = copy_mount_string(type);
3050 ret = PTR_ERR(kernel_type);
3051 if (IS_ERR(kernel_type))
3052 goto out_type;
3053
3054 kernel_dev = copy_mount_string(dev_name);
3055 ret = PTR_ERR(kernel_dev);
3056 if (IS_ERR(kernel_dev))
3057 goto out_dev;
3058
3059 options = copy_mount_options(data);
3060 ret = PTR_ERR(options);
3061 if (IS_ERR(options))
3062 goto out_data;
3063
3064 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3065
3066 kfree(options);
3067out_data:
3068 kfree(kernel_dev);
3069out_dev:
3070 kfree(kernel_type);
3071out_type:
3072 return ret;
3073}
3074
3075SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3076 char __user *, type, unsigned long, flags, void __user *, data)
3077{
3078 return ksys_mount(dev_name, dir_name, type, flags, data);
3079}
3080
3081
3082
3083
3084
3085
3086bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3087 const struct path *root)
3088{
3089 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3090 dentry = mnt->mnt_mountpoint;
3091 mnt = mnt->mnt_parent;
3092 }
3093 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3094}
3095
3096bool path_is_under(const struct path *path1, const struct path *path2)
3097{
3098 bool res;
3099 read_seqlock_excl(&mount_lock);
3100 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3101 read_sequnlock_excl(&mount_lock);
3102 return res;
3103}
3104EXPORT_SYMBOL(path_is_under);
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3132 const char __user *, put_old)
3133{
3134 struct path new, old, parent_path, root_parent, root;
3135 struct mount *new_mnt, *root_mnt, *old_mnt;
3136 struct mountpoint *old_mp, *root_mp;
3137 int error;
3138
3139 if (!may_mount())
3140 return -EPERM;
3141
3142 error = user_path_dir(new_root, &new);
3143 if (error)
3144 goto out0;
3145
3146 error = user_path_dir(put_old, &old);
3147 if (error)
3148 goto out1;
3149
3150 error = security_sb_pivotroot(&old, &new);
3151 if (error)
3152 goto out2;
3153
3154 get_fs_root(current->fs, &root);
3155 old_mp = lock_mount(&old);
3156 error = PTR_ERR(old_mp);
3157 if (IS_ERR(old_mp))
3158 goto out3;
3159
3160 error = -EINVAL;
3161 new_mnt = real_mount(new.mnt);
3162 root_mnt = real_mount(root.mnt);
3163 old_mnt = real_mount(old.mnt);
3164 if (IS_MNT_SHARED(old_mnt) ||
3165 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3166 IS_MNT_SHARED(root_mnt->mnt_parent))
3167 goto out4;
3168 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3169 goto out4;
3170 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3171 goto out4;
3172 error = -ENOENT;
3173 if (d_unlinked(new.dentry))
3174 goto out4;
3175 error = -EBUSY;
3176 if (new_mnt == root_mnt || old_mnt == root_mnt)
3177 goto out4;
3178 error = -EINVAL;
3179 if (root.mnt->mnt_root != root.dentry)
3180 goto out4;
3181 if (!mnt_has_parent(root_mnt))
3182 goto out4;
3183 root_mp = root_mnt->mnt_mp;
3184 if (new.mnt->mnt_root != new.dentry)
3185 goto out4;
3186 if (!mnt_has_parent(new_mnt))
3187 goto out4;
3188
3189 if (!is_path_reachable(old_mnt, old.dentry, &new))
3190 goto out4;
3191
3192 if (!is_path_reachable(new_mnt, new.dentry, &root))
3193 goto out4;
3194 root_mp->m_count++;
3195 lock_mount_hash();
3196 detach_mnt(new_mnt, &parent_path);
3197 detach_mnt(root_mnt, &root_parent);
3198 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3199 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3200 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3201 }
3202
3203 attach_mnt(root_mnt, old_mnt, old_mp);
3204
3205 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3206 touch_mnt_namespace(current->nsproxy->mnt_ns);
3207
3208 list_del_init(&new_mnt->mnt_expire);
3209 put_mountpoint(root_mp);
3210 unlock_mount_hash();
3211 chroot_fs_refs(&root, &new);
3212 error = 0;
3213out4:
3214 unlock_mount(old_mp);
3215 if (!error) {
3216 path_put(&root_parent);
3217 path_put(&parent_path);
3218 }
3219out3:
3220 path_put(&root);
3221out2:
3222 path_put(&old);
3223out1:
3224 path_put(&new);
3225out0:
3226 return error;
3227}
3228
3229static void __init init_mount_tree(void)
3230{
3231 struct vfsmount *mnt;
3232 struct mnt_namespace *ns;
3233 struct path root;
3234 struct file_system_type *type;
3235
3236 type = get_fs_type("rootfs");
3237 if (!type)
3238 panic("Can't find rootfs type");
3239 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3240 put_filesystem(type);
3241 if (IS_ERR(mnt))
3242 panic("Can't create rootfs");
3243
3244 ns = create_mnt_ns(mnt);
3245 if (IS_ERR(ns))
3246 panic("Can't allocate initial namespace");
3247
3248 init_task.nsproxy->mnt_ns = ns;
3249 get_mnt_ns(ns);
3250
3251 root.mnt = mnt;
3252 root.dentry = mnt->mnt_root;
3253 mnt->mnt_flags |= MNT_LOCKED;
3254
3255 set_fs_pwd(current->fs, &root);
3256 set_fs_root(current->fs, &root);
3257}
3258
3259void __init mnt_init(void)
3260{
3261 int err;
3262
3263 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3264 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3265
3266 mount_hashtable = alloc_large_system_hash("Mount-cache",
3267 sizeof(struct hlist_head),
3268 mhash_entries, 19,
3269 HASH_ZERO,
3270 &m_hash_shift, &m_hash_mask, 0, 0);
3271 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3272 sizeof(struct hlist_head),
3273 mphash_entries, 19,
3274 HASH_ZERO,
3275 &mp_hash_shift, &mp_hash_mask, 0, 0);
3276
3277 if (!mount_hashtable || !mountpoint_hashtable)
3278 panic("Failed to allocate mount hash table\n");
3279
3280 kernfs_init();
3281
3282 err = sysfs_init();
3283 if (err)
3284 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3285 __func__, err);
3286 fs_kobj = kobject_create_and_add("fs", NULL);
3287 if (!fs_kobj)
3288 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3289 init_rootfs();
3290 init_mount_tree();
3291}
3292
3293void put_mnt_ns(struct mnt_namespace *ns)
3294{
3295 if (!atomic_dec_and_test(&ns->count))
3296 return;
3297 drop_collected_mounts(&ns->root->mnt);
3298 free_mnt_ns(ns);
3299}
3300
3301struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3302{
3303 struct vfsmount *mnt;
3304 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
3305 if (!IS_ERR(mnt)) {
3306
3307
3308
3309
3310 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3311 }
3312 return mnt;
3313}
3314EXPORT_SYMBOL_GPL(kern_mount_data);
3315
3316void kern_unmount(struct vfsmount *mnt)
3317{
3318
3319 if (!IS_ERR_OR_NULL(mnt)) {
3320 real_mount(mnt)->mnt_ns = NULL;
3321 synchronize_rcu();
3322 mntput(mnt);
3323 }
3324}
3325EXPORT_SYMBOL(kern_unmount);
3326
3327bool our_mnt(struct vfsmount *mnt)
3328{
3329 return check_mnt(real_mount(mnt));
3330}
3331
3332bool current_chrooted(void)
3333{
3334
3335 struct path ns_root;
3336 struct path fs_root;
3337 bool chrooted;
3338
3339
3340 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3341 ns_root.dentry = ns_root.mnt->mnt_root;
3342 path_get(&ns_root);
3343 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3344 ;
3345
3346 get_fs_root(current->fs, &fs_root);
3347
3348 chrooted = !path_equal(&fs_root, &ns_root);
3349
3350 path_put(&fs_root);
3351 path_put(&ns_root);
3352
3353 return chrooted;
3354}
3355
3356static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
3357 int *new_mnt_flags)
3358{
3359 int new_flags = *new_mnt_flags;
3360 struct mount *mnt;
3361 bool visible = false;
3362
3363 down_read(&namespace_sem);
3364 list_for_each_entry(mnt, &ns->list, mnt_list) {
3365 struct mount *child;
3366 int mnt_flags;
3367
3368 if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
3369 continue;
3370
3371
3372
3373
3374 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3375 continue;
3376
3377
3378 mnt_flags = mnt->mnt.mnt_flags;
3379
3380
3381 if (sb_rdonly(mnt->mnt.mnt_sb))
3382 mnt_flags |= MNT_LOCK_READONLY;
3383
3384
3385
3386
3387 if ((mnt_flags & MNT_LOCK_READONLY) &&
3388 !(new_flags & MNT_READONLY))
3389 continue;
3390 if ((mnt_flags & MNT_LOCK_ATIME) &&
3391 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3392 continue;
3393
3394
3395
3396
3397
3398 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3399 struct inode *inode = child->mnt_mountpoint->d_inode;
3400
3401 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3402 continue;
3403
3404 if (!is_empty_dir_inode(inode))
3405 goto next;
3406 }
3407
3408 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3409 MNT_LOCK_ATIME);
3410 visible = true;
3411 goto found;
3412 next: ;
3413 }
3414found:
3415 up_read(&namespace_sem);
3416 return visible;
3417}
3418
3419static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3420{
3421 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3422 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3423 unsigned long s_iflags;
3424
3425 if (ns->user_ns == &init_user_ns)
3426 return false;
3427
3428
3429 s_iflags = mnt->mnt_sb->s_iflags;
3430 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3431 return false;
3432
3433 if ((s_iflags & required_iflags) != required_iflags) {
3434 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3435 required_iflags);
3436 return true;
3437 }
3438
3439 return !mnt_already_visible(ns, mnt, new_mnt_flags);
3440}
3441
3442bool mnt_may_suid(struct vfsmount *mnt)
3443{
3444
3445
3446
3447
3448
3449
3450
3451 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
3452 current_in_userns(mnt->mnt_sb->s_user_ns);
3453}
3454
3455static struct ns_common *mntns_get(struct task_struct *task)
3456{
3457 struct ns_common *ns = NULL;
3458 struct nsproxy *nsproxy;
3459
3460 task_lock(task);
3461 nsproxy = task->nsproxy;
3462 if (nsproxy) {
3463 ns = &nsproxy->mnt_ns->ns;
3464 get_mnt_ns(to_mnt_ns(ns));
3465 }
3466 task_unlock(task);
3467
3468 return ns;
3469}
3470
3471static void mntns_put(struct ns_common *ns)
3472{
3473 put_mnt_ns(to_mnt_ns(ns));
3474}
3475
3476static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3477{
3478 struct fs_struct *fs = current->fs;
3479 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
3480 struct path root;
3481 int err;
3482
3483 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3484 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3485 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3486 return -EPERM;
3487
3488 if (fs->users != 1)
3489 return -EINVAL;
3490
3491 get_mnt_ns(mnt_ns);
3492 old_mnt_ns = nsproxy->mnt_ns;
3493 nsproxy->mnt_ns = mnt_ns;
3494
3495
3496 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
3497 "/", LOOKUP_DOWN, &root);
3498 if (err) {
3499
3500 nsproxy->mnt_ns = old_mnt_ns;
3501 put_mnt_ns(mnt_ns);
3502 return err;
3503 }
3504
3505 put_mnt_ns(old_mnt_ns);
3506
3507
3508 set_fs_pwd(fs, &root);
3509 set_fs_root(fs, &root);
3510
3511 path_put(&root);
3512 return 0;
3513}
3514
3515static struct user_namespace *mntns_owner(struct ns_common *ns)
3516{
3517 return to_mnt_ns(ns)->user_ns;
3518}
3519
3520const struct proc_ns_operations mntns_operations = {
3521 .name = "mnt",
3522 .type = CLONE_NEWNS,
3523 .get = mntns_get,
3524 .put = mntns_put,
3525 .install = mntns_install,
3526 .owner = mntns_owner,
3527};
3528