1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/uaccess.h>
24#include <linux/proc_ns.h>
25#include <linux/magic.h>
26#include <linux/bootmem.h>
27#include <linux/task_work.h>
28#include <linux/sched/task.h>
29
30#include "pnode.h"
31#include "internal.h"
32
33
34unsigned int sysctl_mount_max __read_mostly = 100000;
35
36static unsigned int m_hash_mask __read_mostly;
37static unsigned int m_hash_shift __read_mostly;
38static unsigned int mp_hash_mask __read_mostly;
39static unsigned int mp_hash_shift __read_mostly;
40
41static __initdata unsigned long mhash_entries;
42static int __init set_mhash_entries(char *str)
43{
44 if (!str)
45 return 0;
46 mhash_entries = simple_strtoul(str, &str, 0);
47 return 1;
48}
49__setup("mhash_entries=", set_mhash_entries);
50
51static __initdata unsigned long mphash_entries;
52static int __init set_mphash_entries(char *str)
53{
54 if (!str)
55 return 0;
56 mphash_entries = simple_strtoul(str, &str, 0);
57 return 1;
58}
59__setup("mphash_entries=", set_mphash_entries);
60
61static u64 event;
62static DEFINE_IDA(mnt_id_ida);
63static DEFINE_IDA(mnt_group_ida);
64
65static struct hlist_head *mount_hashtable __read_mostly;
66static struct hlist_head *mountpoint_hashtable __read_mostly;
67static struct kmem_cache *mnt_cache __read_mostly;
68static DECLARE_RWSEM(namespace_sem);
69
70
71struct kobject *fs_kobj;
72EXPORT_SYMBOL_GPL(fs_kobj);
73
74
75
76
77
78
79
80
81
82__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
83
84static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
85{
86 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
87 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
88 tmp = tmp + (tmp >> m_hash_shift);
89 return &mount_hashtable[tmp & m_hash_mask];
90}
91
92static inline struct hlist_head *mp_hash(struct dentry *dentry)
93{
94 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
95 tmp = tmp + (tmp >> mp_hash_shift);
96 return &mountpoint_hashtable[tmp & mp_hash_mask];
97}
98
99static int mnt_alloc_id(struct mount *mnt)
100{
101 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
102
103 if (res < 0)
104 return res;
105 mnt->mnt_id = res;
106 return 0;
107}
108
109static void mnt_free_id(struct mount *mnt)
110{
111 ida_free(&mnt_id_ida, mnt->mnt_id);
112}
113
114
115
116
117static int mnt_alloc_group_id(struct mount *mnt)
118{
119 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
120
121 if (res < 0)
122 return res;
123 mnt->mnt_group_id = res;
124 return 0;
125}
126
127
128
129
130void mnt_release_group_id(struct mount *mnt)
131{
132 ida_free(&mnt_group_ida, mnt->mnt_group_id);
133 mnt->mnt_group_id = 0;
134}
135
136
137
138
139static inline void mnt_add_count(struct mount *mnt, int n)
140{
141#ifdef CONFIG_SMP
142 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
143#else
144 preempt_disable();
145 mnt->mnt_count += n;
146 preempt_enable();
147#endif
148}
149
150
151
152
153unsigned int mnt_get_count(struct mount *mnt)
154{
155#ifdef CONFIG_SMP
156 unsigned int count = 0;
157 int cpu;
158
159 for_each_possible_cpu(cpu) {
160 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
161 }
162
163 return count;
164#else
165 return mnt->mnt_count;
166#endif
167}
168
169static void drop_mountpoint(struct fs_pin *p)
170{
171 struct mount *m = container_of(p, struct mount, mnt_umount);
172 dput(m->mnt_ex_mountpoint);
173 pin_remove(p);
174 mntput(&m->mnt);
175}
176
177static struct mount *alloc_vfsmnt(const char *name)
178{
179 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
180 if (mnt) {
181 int err;
182
183 err = mnt_alloc_id(mnt);
184 if (err)
185 goto out_free_cache;
186
187 if (name) {
188 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
189 if (!mnt->mnt_devname)
190 goto out_free_id;
191 }
192
193#ifdef CONFIG_SMP
194 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
195 if (!mnt->mnt_pcp)
196 goto out_free_devname;
197
198 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
199#else
200 mnt->mnt_count = 1;
201 mnt->mnt_writers = 0;
202#endif
203
204 INIT_HLIST_NODE(&mnt->mnt_hash);
205 INIT_LIST_HEAD(&mnt->mnt_child);
206 INIT_LIST_HEAD(&mnt->mnt_mounts);
207 INIT_LIST_HEAD(&mnt->mnt_list);
208 INIT_LIST_HEAD(&mnt->mnt_expire);
209 INIT_LIST_HEAD(&mnt->mnt_share);
210 INIT_LIST_HEAD(&mnt->mnt_slave_list);
211 INIT_LIST_HEAD(&mnt->mnt_slave);
212 INIT_HLIST_NODE(&mnt->mnt_mp_list);
213 INIT_LIST_HEAD(&mnt->mnt_umounting);
214 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
215 }
216 return mnt;
217
218#ifdef CONFIG_SMP
219out_free_devname:
220 kfree_const(mnt->mnt_devname);
221#endif
222out_free_id:
223 mnt_free_id(mnt);
224out_free_cache:
225 kmem_cache_free(mnt_cache, mnt);
226 return NULL;
227}
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248int __mnt_is_readonly(struct vfsmount *mnt)
249{
250 if (mnt->mnt_flags & MNT_READONLY)
251 return 1;
252 if (sb_rdonly(mnt->mnt_sb))
253 return 1;
254 return 0;
255}
256EXPORT_SYMBOL_GPL(__mnt_is_readonly);
257
258static inline void mnt_inc_writers(struct mount *mnt)
259{
260#ifdef CONFIG_SMP
261 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
262#else
263 mnt->mnt_writers++;
264#endif
265}
266
267static inline void mnt_dec_writers(struct mount *mnt)
268{
269#ifdef CONFIG_SMP
270 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
271#else
272 mnt->mnt_writers--;
273#endif
274}
275
276static unsigned int mnt_get_writers(struct mount *mnt)
277{
278#ifdef CONFIG_SMP
279 unsigned int count = 0;
280 int cpu;
281
282 for_each_possible_cpu(cpu) {
283 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
284 }
285
286 return count;
287#else
288 return mnt->mnt_writers;
289#endif
290}
291
292static int mnt_is_readonly(struct vfsmount *mnt)
293{
294 if (mnt->mnt_sb->s_readonly_remount)
295 return 1;
296
297 smp_rmb();
298 return __mnt_is_readonly(mnt);
299}
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317int __mnt_want_write(struct vfsmount *m)
318{
319 struct mount *mnt = real_mount(m);
320 int ret = 0;
321
322 preempt_disable();
323 mnt_inc_writers(mnt);
324
325
326
327
328
329 smp_mb();
330 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
331 cpu_relax();
332
333
334
335
336
337 smp_rmb();
338 if (mnt_is_readonly(m)) {
339 mnt_dec_writers(mnt);
340 ret = -EROFS;
341 }
342 preempt_enable();
343
344 return ret;
345}
346
347
348
349
350
351
352
353
354
355
356int mnt_want_write(struct vfsmount *m)
357{
358 int ret;
359
360 sb_start_write(m->mnt_sb);
361 ret = __mnt_want_write(m);
362 if (ret)
363 sb_end_write(m->mnt_sb);
364 return ret;
365}
366EXPORT_SYMBOL_GPL(mnt_want_write);
367
368
369
370
371
372
373
374
375
376
377
378
379
380int mnt_clone_write(struct vfsmount *mnt)
381{
382
383 if (__mnt_is_readonly(mnt))
384 return -EROFS;
385 preempt_disable();
386 mnt_inc_writers(real_mount(mnt));
387 preempt_enable();
388 return 0;
389}
390EXPORT_SYMBOL_GPL(mnt_clone_write);
391
392
393
394
395
396
397
398
399int __mnt_want_write_file(struct file *file)
400{
401 if (!(file->f_mode & FMODE_WRITER))
402 return __mnt_want_write(file->f_path.mnt);
403 else
404 return mnt_clone_write(file->f_path.mnt);
405}
406
407
408
409
410
411
412
413
414int mnt_want_write_file(struct file *file)
415{
416 int ret;
417
418 sb_start_write(file_inode(file)->i_sb);
419 ret = __mnt_want_write_file(file);
420 if (ret)
421 sb_end_write(file_inode(file)->i_sb);
422 return ret;
423}
424EXPORT_SYMBOL_GPL(mnt_want_write_file);
425
426
427
428
429
430
431
432
433
434void __mnt_drop_write(struct vfsmount *mnt)
435{
436 preempt_disable();
437 mnt_dec_writers(real_mount(mnt));
438 preempt_enable();
439}
440
441
442
443
444
445
446
447
448
449void mnt_drop_write(struct vfsmount *mnt)
450{
451 __mnt_drop_write(mnt);
452 sb_end_write(mnt->mnt_sb);
453}
454EXPORT_SYMBOL_GPL(mnt_drop_write);
455
456void __mnt_drop_write_file(struct file *file)
457{
458 __mnt_drop_write(file->f_path.mnt);
459}
460
461void mnt_drop_write_file(struct file *file)
462{
463 __mnt_drop_write_file(file);
464 sb_end_write(file_inode(file)->i_sb);
465}
466EXPORT_SYMBOL(mnt_drop_write_file);
467
468static int mnt_make_readonly(struct mount *mnt)
469{
470 int ret = 0;
471
472 lock_mount_hash();
473 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
474
475
476
477
478 smp_mb();
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496 if (mnt_get_writers(mnt) > 0)
497 ret = -EBUSY;
498 else
499 mnt->mnt.mnt_flags |= MNT_READONLY;
500
501
502
503
504 smp_wmb();
505 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
506 unlock_mount_hash();
507 return ret;
508}
509
510static void __mnt_unmake_readonly(struct mount *mnt)
511{
512 lock_mount_hash();
513 mnt->mnt.mnt_flags &= ~MNT_READONLY;
514 unlock_mount_hash();
515}
516
517int sb_prepare_remount_readonly(struct super_block *sb)
518{
519 struct mount *mnt;
520 int err = 0;
521
522
523 if (atomic_long_read(&sb->s_remove_count))
524 return -EBUSY;
525
526 lock_mount_hash();
527 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
528 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
529 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
530 smp_mb();
531 if (mnt_get_writers(mnt) > 0) {
532 err = -EBUSY;
533 break;
534 }
535 }
536 }
537 if (!err && atomic_long_read(&sb->s_remove_count))
538 err = -EBUSY;
539
540 if (!err) {
541 sb->s_readonly_remount = 1;
542 smp_wmb();
543 }
544 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
545 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
546 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
547 }
548 unlock_mount_hash();
549
550 return err;
551}
552
553static void free_vfsmnt(struct mount *mnt)
554{
555 kfree_const(mnt->mnt_devname);
556#ifdef CONFIG_SMP
557 free_percpu(mnt->mnt_pcp);
558#endif
559 kmem_cache_free(mnt_cache, mnt);
560}
561
562static void delayed_free_vfsmnt(struct rcu_head *head)
563{
564 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
565}
566
567
568int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
569{
570 struct mount *mnt;
571 if (read_seqretry(&mount_lock, seq))
572 return 1;
573 if (bastard == NULL)
574 return 0;
575 mnt = real_mount(bastard);
576 mnt_add_count(mnt, 1);
577 smp_mb();
578 if (likely(!read_seqretry(&mount_lock, seq)))
579 return 0;
580 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
581 mnt_add_count(mnt, -1);
582 return 1;
583 }
584 lock_mount_hash();
585 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
586 mnt_add_count(mnt, -1);
587 unlock_mount_hash();
588 return 1;
589 }
590 unlock_mount_hash();
591
592 return -1;
593}
594
595
596bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
597{
598 int res = __legitimize_mnt(bastard, seq);
599 if (likely(!res))
600 return true;
601 if (unlikely(res < 0)) {
602 rcu_read_unlock();
603 mntput(bastard);
604 rcu_read_lock();
605 }
606 return false;
607}
608
609
610
611
612
613struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
614{
615 struct hlist_head *head = m_hash(mnt, dentry);
616 struct mount *p;
617
618 hlist_for_each_entry_rcu(p, head, mnt_hash)
619 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
620 return p;
621 return NULL;
622}
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640struct vfsmount *lookup_mnt(const struct path *path)
641{
642 struct mount *child_mnt;
643 struct vfsmount *m;
644 unsigned seq;
645
646 rcu_read_lock();
647 do {
648 seq = read_seqbegin(&mount_lock);
649 child_mnt = __lookup_mnt(path->mnt, path->dentry);
650 m = child_mnt ? &child_mnt->mnt : NULL;
651 } while (!legitimize_mnt(m, seq));
652 rcu_read_unlock();
653 return m;
654}
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671bool __is_local_mountpoint(struct dentry *dentry)
672{
673 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
674 struct mount *mnt;
675 bool is_covered = false;
676
677 if (!d_mountpoint(dentry))
678 goto out;
679
680 down_read(&namespace_sem);
681 list_for_each_entry(mnt, &ns->list, mnt_list) {
682 is_covered = (mnt->mnt_mountpoint == dentry);
683 if (is_covered)
684 break;
685 }
686 up_read(&namespace_sem);
687out:
688 return is_covered;
689}
690
691static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
692{
693 struct hlist_head *chain = mp_hash(dentry);
694 struct mountpoint *mp;
695
696 hlist_for_each_entry(mp, chain, m_hash) {
697 if (mp->m_dentry == dentry) {
698
699 if (d_unlinked(dentry))
700 return ERR_PTR(-ENOENT);
701 mp->m_count++;
702 return mp;
703 }
704 }
705 return NULL;
706}
707
708static struct mountpoint *get_mountpoint(struct dentry *dentry)
709{
710 struct mountpoint *mp, *new = NULL;
711 int ret;
712
713 if (d_mountpoint(dentry)) {
714mountpoint:
715 read_seqlock_excl(&mount_lock);
716 mp = lookup_mountpoint(dentry);
717 read_sequnlock_excl(&mount_lock);
718 if (mp)
719 goto done;
720 }
721
722 if (!new)
723 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
724 if (!new)
725 return ERR_PTR(-ENOMEM);
726
727
728
729 ret = d_set_mounted(dentry);
730
731
732 if (ret == -EBUSY)
733 goto mountpoint;
734
735
736 mp = ERR_PTR(ret);
737 if (ret)
738 goto done;
739
740
741 read_seqlock_excl(&mount_lock);
742 new->m_dentry = dentry;
743 new->m_count = 1;
744 hlist_add_head(&new->m_hash, mp_hash(dentry));
745 INIT_HLIST_HEAD(&new->m_list);
746 read_sequnlock_excl(&mount_lock);
747
748 mp = new;
749 new = NULL;
750done:
751 kfree(new);
752 return mp;
753}
754
755static void put_mountpoint(struct mountpoint *mp)
756{
757 if (!--mp->m_count) {
758 struct dentry *dentry = mp->m_dentry;
759 BUG_ON(!hlist_empty(&mp->m_list));
760 spin_lock(&dentry->d_lock);
761 dentry->d_flags &= ~DCACHE_MOUNTED;
762 spin_unlock(&dentry->d_lock);
763 hlist_del(&mp->m_hash);
764 kfree(mp);
765 }
766}
767
768static inline int check_mnt(struct mount *mnt)
769{
770 return mnt->mnt_ns == current->nsproxy->mnt_ns;
771}
772
773
774
775
776static void touch_mnt_namespace(struct mnt_namespace *ns)
777{
778 if (ns) {
779 ns->event = ++event;
780 wake_up_interruptible(&ns->poll);
781 }
782}
783
784
785
786
787static void __touch_mnt_namespace(struct mnt_namespace *ns)
788{
789 if (ns && ns->event != event) {
790 ns->event = event;
791 wake_up_interruptible(&ns->poll);
792 }
793}
794
795
796
797
798static void unhash_mnt(struct mount *mnt)
799{
800 mnt->mnt_parent = mnt;
801 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
802 list_del_init(&mnt->mnt_child);
803 hlist_del_init_rcu(&mnt->mnt_hash);
804 hlist_del_init(&mnt->mnt_mp_list);
805 put_mountpoint(mnt->mnt_mp);
806 mnt->mnt_mp = NULL;
807}
808
809
810
811
812static void detach_mnt(struct mount *mnt, struct path *old_path)
813{
814 old_path->dentry = mnt->mnt_mountpoint;
815 old_path->mnt = &mnt->mnt_parent->mnt;
816 unhash_mnt(mnt);
817}
818
819
820
821
822static void umount_mnt(struct mount *mnt)
823{
824
825 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
826 unhash_mnt(mnt);
827}
828
829
830
831
832void mnt_set_mountpoint(struct mount *mnt,
833 struct mountpoint *mp,
834 struct mount *child_mnt)
835{
836 mp->m_count++;
837 mnt_add_count(mnt, 1);
838 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
839 child_mnt->mnt_parent = mnt;
840 child_mnt->mnt_mp = mp;
841 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
842}
843
844static void __attach_mnt(struct mount *mnt, struct mount *parent)
845{
846 hlist_add_head_rcu(&mnt->mnt_hash,
847 m_hash(&parent->mnt, mnt->mnt_mountpoint));
848 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
849}
850
851
852
853
854static void attach_mnt(struct mount *mnt,
855 struct mount *parent,
856 struct mountpoint *mp)
857{
858 mnt_set_mountpoint(parent, mp, mnt);
859 __attach_mnt(mnt, parent);
860}
861
862void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
863{
864 struct mountpoint *old_mp = mnt->mnt_mp;
865 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
866 struct mount *old_parent = mnt->mnt_parent;
867
868 list_del_init(&mnt->mnt_child);
869 hlist_del_init(&mnt->mnt_mp_list);
870 hlist_del_init_rcu(&mnt->mnt_hash);
871
872 attach_mnt(mnt, parent, mp);
873
874 put_mountpoint(old_mp);
875
876
877
878
879
880
881
882
883
884
885
886
887 spin_lock(&old_mountpoint->d_lock);
888 old_mountpoint->d_lockref.count--;
889 spin_unlock(&old_mountpoint->d_lock);
890
891 mnt_add_count(old_parent, -1);
892}
893
894
895
896
897static void commit_tree(struct mount *mnt)
898{
899 struct mount *parent = mnt->mnt_parent;
900 struct mount *m;
901 LIST_HEAD(head);
902 struct mnt_namespace *n = parent->mnt_ns;
903
904 BUG_ON(parent == mnt);
905
906 list_add_tail(&head, &mnt->mnt_list);
907 list_for_each_entry(m, &head, mnt_list)
908 m->mnt_ns = n;
909
910 list_splice(&head, n->list.prev);
911
912 n->mounts += n->pending_mounts;
913 n->pending_mounts = 0;
914
915 __attach_mnt(mnt, parent);
916 touch_mnt_namespace(n);
917}
918
919static struct mount *next_mnt(struct mount *p, struct mount *root)
920{
921 struct list_head *next = p->mnt_mounts.next;
922 if (next == &p->mnt_mounts) {
923 while (1) {
924 if (p == root)
925 return NULL;
926 next = p->mnt_child.next;
927 if (next != &p->mnt_parent->mnt_mounts)
928 break;
929 p = p->mnt_parent;
930 }
931 }
932 return list_entry(next, struct mount, mnt_child);
933}
934
935static struct mount *skip_mnt_tree(struct mount *p)
936{
937 struct list_head *prev = p->mnt_mounts.prev;
938 while (prev != &p->mnt_mounts) {
939 p = list_entry(prev, struct mount, mnt_child);
940 prev = p->mnt_mounts.prev;
941 }
942 return p;
943}
944
945struct vfsmount *
946vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
947{
948 struct mount *mnt;
949 struct dentry *root;
950
951 if (!type)
952 return ERR_PTR(-ENODEV);
953
954 mnt = alloc_vfsmnt(name);
955 if (!mnt)
956 return ERR_PTR(-ENOMEM);
957
958 if (flags & SB_KERNMOUNT)
959 mnt->mnt.mnt_flags = MNT_INTERNAL;
960
961 root = mount_fs(type, flags, name, data);
962 if (IS_ERR(root)) {
963 mnt_free_id(mnt);
964 free_vfsmnt(mnt);
965 return ERR_CAST(root);
966 }
967
968 mnt->mnt.mnt_root = root;
969 mnt->mnt.mnt_sb = root->d_sb;
970 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
971 mnt->mnt_parent = mnt;
972 lock_mount_hash();
973 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
974 unlock_mount_hash();
975 return &mnt->mnt;
976}
977EXPORT_SYMBOL_GPL(vfs_kern_mount);
978
979struct vfsmount *
980vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
981 const char *name, void *data)
982{
983
984
985
986
987 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
988 return ERR_PTR(-EPERM);
989
990 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
991}
992EXPORT_SYMBOL_GPL(vfs_submount);
993
994static struct mount *clone_mnt(struct mount *old, struct dentry *root,
995 int flag)
996{
997 struct super_block *sb = old->mnt.mnt_sb;
998 struct mount *mnt;
999 int err;
1000
1001 mnt = alloc_vfsmnt(old->mnt_devname);
1002 if (!mnt)
1003 return ERR_PTR(-ENOMEM);
1004
1005 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1006 mnt->mnt_group_id = 0;
1007 else
1008 mnt->mnt_group_id = old->mnt_group_id;
1009
1010 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1011 err = mnt_alloc_group_id(mnt);
1012 if (err)
1013 goto out_free;
1014 }
1015
1016 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1017 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1018
1019 if (flag & CL_UNPRIVILEGED) {
1020 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
1021
1022 if (mnt->mnt.mnt_flags & MNT_READONLY)
1023 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
1024
1025 if (mnt->mnt.mnt_flags & MNT_NODEV)
1026 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
1027
1028 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1029 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1030
1031 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1032 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1033 }
1034
1035
1036 if ((flag & CL_UNPRIVILEGED) &&
1037 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1038 mnt->mnt.mnt_flags |= MNT_LOCKED;
1039
1040 atomic_inc(&sb->s_active);
1041 mnt->mnt.mnt_sb = sb;
1042 mnt->mnt.mnt_root = dget(root);
1043 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1044 mnt->mnt_parent = mnt;
1045 lock_mount_hash();
1046 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1047 unlock_mount_hash();
1048
1049 if ((flag & CL_SLAVE) ||
1050 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1051 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1052 mnt->mnt_master = old;
1053 CLEAR_MNT_SHARED(mnt);
1054 } else if (!(flag & CL_PRIVATE)) {
1055 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1056 list_add(&mnt->mnt_share, &old->mnt_share);
1057 if (IS_MNT_SLAVE(old))
1058 list_add(&mnt->mnt_slave, &old->mnt_slave);
1059 mnt->mnt_master = old->mnt_master;
1060 } else {
1061 CLEAR_MNT_SHARED(mnt);
1062 }
1063 if (flag & CL_MAKE_SHARED)
1064 set_mnt_shared(mnt);
1065
1066
1067
1068 if (flag & CL_EXPIRE) {
1069 if (!list_empty(&old->mnt_expire))
1070 list_add(&mnt->mnt_expire, &old->mnt_expire);
1071 }
1072
1073 return mnt;
1074
1075 out_free:
1076 mnt_free_id(mnt);
1077 free_vfsmnt(mnt);
1078 return ERR_PTR(err);
1079}
1080
1081static void cleanup_mnt(struct mount *mnt)
1082{
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 WARN_ON(mnt_get_writers(mnt));
1094 if (unlikely(mnt->mnt_pins.first))
1095 mnt_pin_kill(mnt);
1096 fsnotify_vfsmount_delete(&mnt->mnt);
1097 dput(mnt->mnt.mnt_root);
1098 deactivate_super(mnt->mnt.mnt_sb);
1099 mnt_free_id(mnt);
1100 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1101}
1102
1103static void __cleanup_mnt(struct rcu_head *head)
1104{
1105 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1106}
1107
1108static LLIST_HEAD(delayed_mntput_list);
1109static void delayed_mntput(struct work_struct *unused)
1110{
1111 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1112 struct mount *m, *t;
1113
1114 llist_for_each_entry_safe(m, t, node, mnt_llist)
1115 cleanup_mnt(m);
1116}
1117static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1118
1119static void mntput_no_expire(struct mount *mnt)
1120{
1121 rcu_read_lock();
1122 if (likely(READ_ONCE(mnt->mnt_ns))) {
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132 mnt_add_count(mnt, -1);
1133 rcu_read_unlock();
1134 return;
1135 }
1136 lock_mount_hash();
1137
1138
1139
1140
1141 smp_mb();
1142 mnt_add_count(mnt, -1);
1143 if (mnt_get_count(mnt)) {
1144 rcu_read_unlock();
1145 unlock_mount_hash();
1146 return;
1147 }
1148 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1149 rcu_read_unlock();
1150 unlock_mount_hash();
1151 return;
1152 }
1153 mnt->mnt.mnt_flags |= MNT_DOOMED;
1154 rcu_read_unlock();
1155
1156 list_del(&mnt->mnt_instance);
1157
1158 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1159 struct mount *p, *tmp;
1160 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1161 umount_mnt(p);
1162 }
1163 }
1164 unlock_mount_hash();
1165
1166 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1167 struct task_struct *task = current;
1168 if (likely(!(task->flags & PF_KTHREAD))) {
1169 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1170 if (!task_work_add(task, &mnt->mnt_rcu, true))
1171 return;
1172 }
1173 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1174 schedule_delayed_work(&delayed_mntput_work, 1);
1175 return;
1176 }
1177 cleanup_mnt(mnt);
1178}
1179
1180void mntput(struct vfsmount *mnt)
1181{
1182 if (mnt) {
1183 struct mount *m = real_mount(mnt);
1184
1185 if (unlikely(m->mnt_expiry_mark))
1186 m->mnt_expiry_mark = 0;
1187 mntput_no_expire(m);
1188 }
1189}
1190EXPORT_SYMBOL(mntput);
1191
1192struct vfsmount *mntget(struct vfsmount *mnt)
1193{
1194 if (mnt)
1195 mnt_add_count(real_mount(mnt), 1);
1196 return mnt;
1197}
1198EXPORT_SYMBOL(mntget);
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210bool path_is_mountpoint(const struct path *path)
1211{
1212 unsigned seq;
1213 bool res;
1214
1215 if (!d_mountpoint(path->dentry))
1216 return false;
1217
1218 rcu_read_lock();
1219 do {
1220 seq = read_seqbegin(&mount_lock);
1221 res = __path_is_mountpoint(path);
1222 } while (read_seqretry(&mount_lock, seq));
1223 rcu_read_unlock();
1224
1225 return res;
1226}
1227EXPORT_SYMBOL(path_is_mountpoint);
1228
1229struct vfsmount *mnt_clone_internal(const struct path *path)
1230{
1231 struct mount *p;
1232 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1233 if (IS_ERR(p))
1234 return ERR_CAST(p);
1235 p->mnt.mnt_flags |= MNT_INTERNAL;
1236 return &p->mnt;
1237}
1238
1239#ifdef CONFIG_PROC_FS
1240
1241static void *m_start(struct seq_file *m, loff_t *pos)
1242{
1243 struct proc_mounts *p = m->private;
1244
1245 down_read(&namespace_sem);
1246 if (p->cached_event == p->ns->event) {
1247 void *v = p->cached_mount;
1248 if (*pos == p->cached_index)
1249 return v;
1250 if (*pos == p->cached_index + 1) {
1251 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1252 return p->cached_mount = v;
1253 }
1254 }
1255
1256 p->cached_event = p->ns->event;
1257 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1258 p->cached_index = *pos;
1259 return p->cached_mount;
1260}
1261
1262static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1263{
1264 struct proc_mounts *p = m->private;
1265
1266 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1267 p->cached_index = *pos;
1268 return p->cached_mount;
1269}
1270
1271static void m_stop(struct seq_file *m, void *v)
1272{
1273 up_read(&namespace_sem);
1274}
1275
1276static int m_show(struct seq_file *m, void *v)
1277{
1278 struct proc_mounts *p = m->private;
1279 struct mount *r = list_entry(v, struct mount, mnt_list);
1280 return p->show(m, &r->mnt);
1281}
1282
1283const struct seq_operations mounts_op = {
1284 .start = m_start,
1285 .next = m_next,
1286 .stop = m_stop,
1287 .show = m_show,
1288};
1289#endif
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299int may_umount_tree(struct vfsmount *m)
1300{
1301 struct mount *mnt = real_mount(m);
1302 int actual_refs = 0;
1303 int minimum_refs = 0;
1304 struct mount *p;
1305 BUG_ON(!m);
1306
1307
1308 lock_mount_hash();
1309 for (p = mnt; p; p = next_mnt(p, mnt)) {
1310 actual_refs += mnt_get_count(p);
1311 minimum_refs += 2;
1312 }
1313 unlock_mount_hash();
1314
1315 if (actual_refs > minimum_refs)
1316 return 0;
1317
1318 return 1;
1319}
1320
1321EXPORT_SYMBOL(may_umount_tree);
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336int may_umount(struct vfsmount *mnt)
1337{
1338 int ret = 1;
1339 down_read(&namespace_sem);
1340 lock_mount_hash();
1341 if (propagate_mount_busy(real_mount(mnt), 2))
1342 ret = 0;
1343 unlock_mount_hash();
1344 up_read(&namespace_sem);
1345 return ret;
1346}
1347
1348EXPORT_SYMBOL(may_umount);
1349
1350static HLIST_HEAD(unmounted);
1351
1352static void namespace_unlock(void)
1353{
1354 struct hlist_head head;
1355
1356 hlist_move_list(&unmounted, &head);
1357
1358 up_write(&namespace_sem);
1359
1360 if (likely(hlist_empty(&head)))
1361 return;
1362
1363 synchronize_rcu();
1364
1365 group_pin_kill(&head);
1366}
1367
1368static inline void namespace_lock(void)
1369{
1370 down_write(&namespace_sem);
1371}
1372
1373enum umount_tree_flags {
1374 UMOUNT_SYNC = 1,
1375 UMOUNT_PROPAGATE = 2,
1376 UMOUNT_CONNECTED = 4,
1377};
1378
1379static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1380{
1381
1382 if (how & UMOUNT_SYNC)
1383 return true;
1384
1385
1386 if (!mnt_has_parent(mnt))
1387 return true;
1388
1389
1390
1391
1392
1393 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1394 return true;
1395
1396
1397 if (how & UMOUNT_CONNECTED)
1398 return false;
1399
1400
1401 if (IS_MNT_LOCKED(mnt))
1402 return false;
1403
1404
1405 return true;
1406}
1407
1408
1409
1410
1411
1412static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1413{
1414 LIST_HEAD(tmp_list);
1415 struct mount *p;
1416
1417 if (how & UMOUNT_PROPAGATE)
1418 propagate_mount_unlock(mnt);
1419
1420
1421 for (p = mnt; p; p = next_mnt(p, mnt)) {
1422 p->mnt.mnt_flags |= MNT_UMOUNT;
1423 list_move(&p->mnt_list, &tmp_list);
1424 }
1425
1426
1427 list_for_each_entry(p, &tmp_list, mnt_list) {
1428 list_del_init(&p->mnt_child);
1429 }
1430
1431
1432 if (how & UMOUNT_PROPAGATE)
1433 propagate_umount(&tmp_list);
1434
1435 while (!list_empty(&tmp_list)) {
1436 struct mnt_namespace *ns;
1437 bool disconnect;
1438 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1439 list_del_init(&p->mnt_expire);
1440 list_del_init(&p->mnt_list);
1441 ns = p->mnt_ns;
1442 if (ns) {
1443 ns->mounts--;
1444 __touch_mnt_namespace(ns);
1445 }
1446 p->mnt_ns = NULL;
1447 if (how & UMOUNT_SYNC)
1448 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1449
1450 disconnect = disconnect_mount(p, how);
1451
1452 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1453 disconnect ? &unmounted : NULL);
1454 if (mnt_has_parent(p)) {
1455 mnt_add_count(p->mnt_parent, -1);
1456 if (!disconnect) {
1457
1458 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1459 } else {
1460 umount_mnt(p);
1461 }
1462 }
1463 change_mnt_propagation(p, MS_PRIVATE);
1464 }
1465}
1466
1467static void shrink_submounts(struct mount *mnt);
1468
1469static int do_umount(struct mount *mnt, int flags)
1470{
1471 struct super_block *sb = mnt->mnt.mnt_sb;
1472 int retval;
1473
1474 retval = security_sb_umount(&mnt->mnt, flags);
1475 if (retval)
1476 return retval;
1477
1478
1479
1480
1481
1482
1483
1484 if (flags & MNT_EXPIRE) {
1485 if (&mnt->mnt == current->fs->root.mnt ||
1486 flags & (MNT_FORCE | MNT_DETACH))
1487 return -EINVAL;
1488
1489
1490
1491
1492
1493 lock_mount_hash();
1494 if (mnt_get_count(mnt) != 2) {
1495 unlock_mount_hash();
1496 return -EBUSY;
1497 }
1498 unlock_mount_hash();
1499
1500 if (!xchg(&mnt->mnt_expiry_mark, 1))
1501 return -EAGAIN;
1502 }
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1515 sb->s_op->umount_begin(sb);
1516 }
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1528
1529
1530
1531
1532 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1533 return -EPERM;
1534 down_write(&sb->s_umount);
1535 if (!sb_rdonly(sb))
1536 retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
1537 up_write(&sb->s_umount);
1538 return retval;
1539 }
1540
1541 namespace_lock();
1542 lock_mount_hash();
1543 event++;
1544
1545 if (flags & MNT_DETACH) {
1546 if (!list_empty(&mnt->mnt_list))
1547 umount_tree(mnt, UMOUNT_PROPAGATE);
1548 retval = 0;
1549 } else {
1550 shrink_submounts(mnt);
1551 retval = -EBUSY;
1552 if (!propagate_mount_busy(mnt, 2)) {
1553 if (!list_empty(&mnt->mnt_list))
1554 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1555 retval = 0;
1556 }
1557 }
1558 unlock_mount_hash();
1559 namespace_unlock();
1560 return retval;
1561}
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573void __detach_mounts(struct dentry *dentry)
1574{
1575 struct mountpoint *mp;
1576 struct mount *mnt;
1577
1578 namespace_lock();
1579 lock_mount_hash();
1580 mp = lookup_mountpoint(dentry);
1581 if (IS_ERR_OR_NULL(mp))
1582 goto out_unlock;
1583
1584 event++;
1585 while (!hlist_empty(&mp->m_list)) {
1586 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1587 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1588 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1589 umount_mnt(mnt);
1590 }
1591 else umount_tree(mnt, UMOUNT_CONNECTED);
1592 }
1593 put_mountpoint(mp);
1594out_unlock:
1595 unlock_mount_hash();
1596 namespace_unlock();
1597}
1598
1599
1600
1601
1602static inline bool may_mount(void)
1603{
1604 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1605}
1606
1607static inline bool may_mandlock(void)
1608{
1609#ifndef CONFIG_MANDATORY_FILE_LOCKING
1610 return false;
1611#endif
1612 return capable(CAP_SYS_ADMIN);
1613}
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623int ksys_umount(char __user *name, int flags)
1624{
1625 struct path path;
1626 struct mount *mnt;
1627 int retval;
1628 int lookup_flags = 0;
1629
1630 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1631 return -EINVAL;
1632
1633 if (!may_mount())
1634 return -EPERM;
1635
1636 if (!(flags & UMOUNT_NOFOLLOW))
1637 lookup_flags |= LOOKUP_FOLLOW;
1638
1639 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1640 if (retval)
1641 goto out;
1642 mnt = real_mount(path.mnt);
1643 retval = -EINVAL;
1644 if (path.dentry != path.mnt->mnt_root)
1645 goto dput_and_out;
1646 if (!check_mnt(mnt))
1647 goto dput_and_out;
1648 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1649 goto dput_and_out;
1650 retval = -EPERM;
1651 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1652 goto dput_and_out;
1653
1654 retval = do_umount(mnt, flags);
1655dput_and_out:
1656
1657 dput(path.dentry);
1658 mntput_no_expire(mnt);
1659out:
1660 return retval;
1661}
1662
1663SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1664{
1665 return ksys_umount(name, flags);
1666}
1667
1668#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1669
1670
1671
1672
1673SYSCALL_DEFINE1(oldumount, char __user *, name)
1674{
1675 return ksys_umount(name, 0);
1676}
1677
1678#endif
1679
1680static bool is_mnt_ns_file(struct dentry *dentry)
1681{
1682
1683 return dentry->d_op == &ns_dentry_operations &&
1684 dentry->d_fsdata == &mntns_operations;
1685}
1686
1687struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1688{
1689 return container_of(ns, struct mnt_namespace, ns);
1690}
1691
1692static bool mnt_ns_loop(struct dentry *dentry)
1693{
1694
1695
1696
1697 struct mnt_namespace *mnt_ns;
1698 if (!is_mnt_ns_file(dentry))
1699 return false;
1700
1701 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1702 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1703}
1704
1705struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1706 int flag)
1707{
1708 struct mount *res, *p, *q, *r, *parent;
1709
1710 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1711 return ERR_PTR(-EINVAL);
1712
1713 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1714 return ERR_PTR(-EINVAL);
1715
1716 res = q = clone_mnt(mnt, dentry, flag);
1717 if (IS_ERR(q))
1718 return q;
1719
1720 q->mnt_mountpoint = mnt->mnt_mountpoint;
1721
1722 p = mnt;
1723 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1724 struct mount *s;
1725 if (!is_subdir(r->mnt_mountpoint, dentry))
1726 continue;
1727
1728 for (s = r; s; s = next_mnt(s, r)) {
1729 if (!(flag & CL_COPY_UNBINDABLE) &&
1730 IS_MNT_UNBINDABLE(s)) {
1731 s = skip_mnt_tree(s);
1732 continue;
1733 }
1734 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1735 is_mnt_ns_file(s->mnt.mnt_root)) {
1736 s = skip_mnt_tree(s);
1737 continue;
1738 }
1739 while (p != s->mnt_parent) {
1740 p = p->mnt_parent;
1741 q = q->mnt_parent;
1742 }
1743 p = s;
1744 parent = q;
1745 q = clone_mnt(p, p->mnt.mnt_root, flag);
1746 if (IS_ERR(q))
1747 goto out;
1748 lock_mount_hash();
1749 list_add_tail(&q->mnt_list, &res->mnt_list);
1750 attach_mnt(q, parent, p->mnt_mp);
1751 unlock_mount_hash();
1752 }
1753 }
1754 return res;
1755out:
1756 if (res) {
1757 lock_mount_hash();
1758 umount_tree(res, UMOUNT_SYNC);
1759 unlock_mount_hash();
1760 }
1761 return q;
1762}
1763
1764
1765
1766struct vfsmount *collect_mounts(const struct path *path)
1767{
1768 struct mount *tree;
1769 namespace_lock();
1770 if (!check_mnt(real_mount(path->mnt)))
1771 tree = ERR_PTR(-EINVAL);
1772 else
1773 tree = copy_tree(real_mount(path->mnt), path->dentry,
1774 CL_COPY_ALL | CL_PRIVATE);
1775 namespace_unlock();
1776 if (IS_ERR(tree))
1777 return ERR_CAST(tree);
1778 return &tree->mnt;
1779}
1780
1781void drop_collected_mounts(struct vfsmount *mnt)
1782{
1783 namespace_lock();
1784 lock_mount_hash();
1785 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1786 unlock_mount_hash();
1787 namespace_unlock();
1788}
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799struct vfsmount *clone_private_mount(const struct path *path)
1800{
1801 struct mount *old_mnt = real_mount(path->mnt);
1802 struct mount *new_mnt;
1803
1804 if (IS_MNT_UNBINDABLE(old_mnt))
1805 return ERR_PTR(-EINVAL);
1806
1807 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1808 if (IS_ERR(new_mnt))
1809 return ERR_CAST(new_mnt);
1810
1811 return &new_mnt->mnt;
1812}
1813EXPORT_SYMBOL_GPL(clone_private_mount);
1814
1815int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1816 struct vfsmount *root)
1817{
1818 struct mount *mnt;
1819 int res = f(root, arg);
1820 if (res)
1821 return res;
1822 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1823 res = f(&mnt->mnt, arg);
1824 if (res)
1825 return res;
1826 }
1827 return 0;
1828}
1829
1830static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1831{
1832 struct mount *p;
1833
1834 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1835 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1836 mnt_release_group_id(p);
1837 }
1838}
1839
1840static int invent_group_ids(struct mount *mnt, bool recurse)
1841{
1842 struct mount *p;
1843
1844 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1845 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1846 int err = mnt_alloc_group_id(p);
1847 if (err) {
1848 cleanup_group_ids(mnt, p);
1849 return err;
1850 }
1851 }
1852 }
1853
1854 return 0;
1855}
1856
1857int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1858{
1859 unsigned int max = READ_ONCE(sysctl_mount_max);
1860 unsigned int mounts = 0, old, pending, sum;
1861 struct mount *p;
1862
1863 for (p = mnt; p; p = next_mnt(p, mnt))
1864 mounts++;
1865
1866 old = ns->mounts;
1867 pending = ns->pending_mounts;
1868 sum = old + pending;
1869 if ((old > sum) ||
1870 (pending > sum) ||
1871 (max < sum) ||
1872 (mounts > (max - sum)))
1873 return -ENOSPC;
1874
1875 ns->pending_mounts = pending + mounts;
1876 return 0;
1877}
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942static int attach_recursive_mnt(struct mount *source_mnt,
1943 struct mount *dest_mnt,
1944 struct mountpoint *dest_mp,
1945 struct path *parent_path)
1946{
1947 HLIST_HEAD(tree_list);
1948 struct mnt_namespace *ns = dest_mnt->mnt_ns;
1949 struct mountpoint *smp;
1950 struct mount *child, *p;
1951 struct hlist_node *n;
1952 int err;
1953
1954
1955
1956
1957 smp = get_mountpoint(source_mnt->mnt.mnt_root);
1958 if (IS_ERR(smp))
1959 return PTR_ERR(smp);
1960
1961
1962 if (!parent_path) {
1963 err = count_mounts(ns, source_mnt);
1964 if (err)
1965 goto out;
1966 }
1967
1968 if (IS_MNT_SHARED(dest_mnt)) {
1969 err = invent_group_ids(source_mnt, true);
1970 if (err)
1971 goto out;
1972 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1973 lock_mount_hash();
1974 if (err)
1975 goto out_cleanup_ids;
1976 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1977 set_mnt_shared(p);
1978 } else {
1979 lock_mount_hash();
1980 }
1981 if (parent_path) {
1982 detach_mnt(source_mnt, parent_path);
1983 attach_mnt(source_mnt, dest_mnt, dest_mp);
1984 touch_mnt_namespace(source_mnt->mnt_ns);
1985 } else {
1986 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1987 commit_tree(source_mnt);
1988 }
1989
1990 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
1991 struct mount *q;
1992 hlist_del_init(&child->mnt_hash);
1993 q = __lookup_mnt(&child->mnt_parent->mnt,
1994 child->mnt_mountpoint);
1995 if (q)
1996 mnt_change_mountpoint(child, smp, q);
1997 commit_tree(child);
1998 }
1999 put_mountpoint(smp);
2000 unlock_mount_hash();
2001
2002 return 0;
2003
2004 out_cleanup_ids:
2005 while (!hlist_empty(&tree_list)) {
2006 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2007 child->mnt_parent->mnt_ns->pending_mounts = 0;
2008 umount_tree(child, UMOUNT_SYNC);
2009 }
2010 unlock_mount_hash();
2011 cleanup_group_ids(source_mnt, NULL);
2012 out:
2013 ns->pending_mounts = 0;
2014
2015 read_seqlock_excl(&mount_lock);
2016 put_mountpoint(smp);
2017 read_sequnlock_excl(&mount_lock);
2018
2019 return err;
2020}
2021
2022static struct mountpoint *lock_mount(struct path *path)
2023{
2024 struct vfsmount *mnt;
2025 struct dentry *dentry = path->dentry;
2026retry:
2027 inode_lock(dentry->d_inode);
2028 if (unlikely(cant_mount(dentry))) {
2029 inode_unlock(dentry->d_inode);
2030 return ERR_PTR(-ENOENT);
2031 }
2032 namespace_lock();
2033 mnt = lookup_mnt(path);
2034 if (likely(!mnt)) {
2035 struct mountpoint *mp = get_mountpoint(dentry);
2036 if (IS_ERR(mp)) {
2037 namespace_unlock();
2038 inode_unlock(dentry->d_inode);
2039 return mp;
2040 }
2041 return mp;
2042 }
2043 namespace_unlock();
2044 inode_unlock(path->dentry->d_inode);
2045 path_put(path);
2046 path->mnt = mnt;
2047 dentry = path->dentry = dget(mnt->mnt_root);
2048 goto retry;
2049}
2050
2051static void unlock_mount(struct mountpoint *where)
2052{
2053 struct dentry *dentry = where->m_dentry;
2054
2055 read_seqlock_excl(&mount_lock);
2056 put_mountpoint(where);
2057 read_sequnlock_excl(&mount_lock);
2058
2059 namespace_unlock();
2060 inode_unlock(dentry->d_inode);
2061}
2062
2063static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2064{
2065 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2066 return -EINVAL;
2067
2068 if (d_is_dir(mp->m_dentry) !=
2069 d_is_dir(mnt->mnt.mnt_root))
2070 return -ENOTDIR;
2071
2072 return attach_recursive_mnt(mnt, p, mp, NULL);
2073}
2074
2075
2076
2077
2078
2079static int flags_to_propagation_type(int ms_flags)
2080{
2081 int type = ms_flags & ~(MS_REC | MS_SILENT);
2082
2083
2084 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2085 return 0;
2086
2087 if (!is_power_of_2(type))
2088 return 0;
2089 return type;
2090}
2091
2092
2093
2094
2095static int do_change_type(struct path *path, int ms_flags)
2096{
2097 struct mount *m;
2098 struct mount *mnt = real_mount(path->mnt);
2099 int recurse = ms_flags & MS_REC;
2100 int type;
2101 int err = 0;
2102
2103 if (path->dentry != path->mnt->mnt_root)
2104 return -EINVAL;
2105
2106 type = flags_to_propagation_type(ms_flags);
2107 if (!type)
2108 return -EINVAL;
2109
2110 namespace_lock();
2111 if (type == MS_SHARED) {
2112 err = invent_group_ids(mnt, recurse);
2113 if (err)
2114 goto out_unlock;
2115 }
2116
2117 lock_mount_hash();
2118 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2119 change_mnt_propagation(m, type);
2120 unlock_mount_hash();
2121
2122 out_unlock:
2123 namespace_unlock();
2124 return err;
2125}
2126
2127static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2128{
2129 struct mount *child;
2130 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2131 if (!is_subdir(child->mnt_mountpoint, dentry))
2132 continue;
2133
2134 if (child->mnt.mnt_flags & MNT_LOCKED)
2135 return true;
2136 }
2137 return false;
2138}
2139
2140
2141
2142
2143static int do_loopback(struct path *path, const char *old_name,
2144 int recurse)
2145{
2146 struct path old_path;
2147 struct mount *mnt = NULL, *old, *parent;
2148 struct mountpoint *mp;
2149 int err;
2150 if (!old_name || !*old_name)
2151 return -EINVAL;
2152 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2153 if (err)
2154 return err;
2155
2156 err = -EINVAL;
2157 if (mnt_ns_loop(old_path.dentry))
2158 goto out;
2159
2160 mp = lock_mount(path);
2161 err = PTR_ERR(mp);
2162 if (IS_ERR(mp))
2163 goto out;
2164
2165 old = real_mount(old_path.mnt);
2166 parent = real_mount(path->mnt);
2167
2168 err = -EINVAL;
2169 if (IS_MNT_UNBINDABLE(old))
2170 goto out2;
2171
2172 if (!check_mnt(parent))
2173 goto out2;
2174
2175 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2176 goto out2;
2177
2178 if (!recurse && has_locked_children(old, old_path.dentry))
2179 goto out2;
2180
2181 if (recurse)
2182 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2183 else
2184 mnt = clone_mnt(old, old_path.dentry, 0);
2185
2186 if (IS_ERR(mnt)) {
2187 err = PTR_ERR(mnt);
2188 goto out2;
2189 }
2190
2191 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2192
2193 err = graft_tree(mnt, parent, mp);
2194 if (err) {
2195 lock_mount_hash();
2196 umount_tree(mnt, UMOUNT_SYNC);
2197 unlock_mount_hash();
2198 }
2199out2:
2200 unlock_mount(mp);
2201out:
2202 path_put(&old_path);
2203 return err;
2204}
2205
2206static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2207{
2208 int error = 0;
2209 int readonly_request = 0;
2210
2211 if (ms_flags & MS_RDONLY)
2212 readonly_request = 1;
2213 if (readonly_request == __mnt_is_readonly(mnt))
2214 return 0;
2215
2216 if (readonly_request)
2217 error = mnt_make_readonly(real_mount(mnt));
2218 else
2219 __mnt_unmake_readonly(real_mount(mnt));
2220 return error;
2221}
2222
2223
2224
2225
2226
2227
2228static int do_remount(struct path *path, int ms_flags, int sb_flags,
2229 int mnt_flags, void *data)
2230{
2231 int err;
2232 struct super_block *sb = path->mnt->mnt_sb;
2233 struct mount *mnt = real_mount(path->mnt);
2234
2235 if (!check_mnt(mnt))
2236 return -EINVAL;
2237
2238 if (path->dentry != path->mnt->mnt_root)
2239 return -EINVAL;
2240
2241
2242
2243
2244
2245
2246
2247 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2248 !(mnt_flags & MNT_READONLY)) {
2249 return -EPERM;
2250 }
2251 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2252 !(mnt_flags & MNT_NODEV)) {
2253 return -EPERM;
2254 }
2255 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2256 !(mnt_flags & MNT_NOSUID)) {
2257 return -EPERM;
2258 }
2259 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2260 !(mnt_flags & MNT_NOEXEC)) {
2261 return -EPERM;
2262 }
2263 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2264 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2265 return -EPERM;
2266 }
2267
2268 err = security_sb_remount(sb, data);
2269 if (err)
2270 return err;
2271
2272 down_write(&sb->s_umount);
2273 if (ms_flags & MS_BIND)
2274 err = change_mount_flags(path->mnt, ms_flags);
2275 else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
2276 err = -EPERM;
2277 else
2278 err = do_remount_sb(sb, sb_flags, data, 0);
2279 if (!err) {
2280 lock_mount_hash();
2281 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2282 mnt->mnt.mnt_flags = mnt_flags;
2283 touch_mnt_namespace(mnt->mnt_ns);
2284 unlock_mount_hash();
2285 }
2286 up_write(&sb->s_umount);
2287 return err;
2288}
2289
2290static inline int tree_contains_unbindable(struct mount *mnt)
2291{
2292 struct mount *p;
2293 for (p = mnt; p; p = next_mnt(p, mnt)) {
2294 if (IS_MNT_UNBINDABLE(p))
2295 return 1;
2296 }
2297 return 0;
2298}
2299
2300static int do_move_mount(struct path *path, const char *old_name)
2301{
2302 struct path old_path, parent_path;
2303 struct mount *p;
2304 struct mount *old;
2305 struct mountpoint *mp;
2306 int err;
2307 if (!old_name || !*old_name)
2308 return -EINVAL;
2309 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2310 if (err)
2311 return err;
2312
2313 mp = lock_mount(path);
2314 err = PTR_ERR(mp);
2315 if (IS_ERR(mp))
2316 goto out;
2317
2318 old = real_mount(old_path.mnt);
2319 p = real_mount(path->mnt);
2320
2321 err = -EINVAL;
2322 if (!check_mnt(p) || !check_mnt(old))
2323 goto out1;
2324
2325 if (old->mnt.mnt_flags & MNT_LOCKED)
2326 goto out1;
2327
2328 err = -EINVAL;
2329 if (old_path.dentry != old_path.mnt->mnt_root)
2330 goto out1;
2331
2332 if (!mnt_has_parent(old))
2333 goto out1;
2334
2335 if (d_is_dir(path->dentry) !=
2336 d_is_dir(old_path.dentry))
2337 goto out1;
2338
2339
2340
2341 if (IS_MNT_SHARED(old->mnt_parent))
2342 goto out1;
2343
2344
2345
2346
2347 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2348 goto out1;
2349 err = -ELOOP;
2350 for (; mnt_has_parent(p); p = p->mnt_parent)
2351 if (p == old)
2352 goto out1;
2353
2354 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2355 if (err)
2356 goto out1;
2357
2358
2359
2360 list_del_init(&old->mnt_expire);
2361out1:
2362 unlock_mount(mp);
2363out:
2364 if (!err)
2365 path_put(&parent_path);
2366 path_put(&old_path);
2367 return err;
2368}
2369
2370static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2371{
2372 int err;
2373 const char *subtype = strchr(fstype, '.');
2374 if (subtype) {
2375 subtype++;
2376 err = -EINVAL;
2377 if (!subtype[0])
2378 goto err;
2379 } else
2380 subtype = "";
2381
2382 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2383 err = -ENOMEM;
2384 if (!mnt->mnt_sb->s_subtype)
2385 goto err;
2386 return mnt;
2387
2388 err:
2389 mntput(mnt);
2390 return ERR_PTR(err);
2391}
2392
2393
2394
2395
2396static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2397{
2398 struct mountpoint *mp;
2399 struct mount *parent;
2400 int err;
2401
2402 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2403
2404 mp = lock_mount(path);
2405 if (IS_ERR(mp))
2406 return PTR_ERR(mp);
2407
2408 parent = real_mount(path->mnt);
2409 err = -EINVAL;
2410 if (unlikely(!check_mnt(parent))) {
2411
2412 if (!(mnt_flags & MNT_SHRINKABLE))
2413 goto unlock;
2414
2415 if (!parent->mnt_ns)
2416 goto unlock;
2417 }
2418
2419
2420 err = -EBUSY;
2421 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2422 path->mnt->mnt_root == path->dentry)
2423 goto unlock;
2424
2425 err = -EINVAL;
2426 if (d_is_symlink(newmnt->mnt.mnt_root))
2427 goto unlock;
2428
2429 newmnt->mnt.mnt_flags = mnt_flags;
2430 err = graft_tree(newmnt, parent, mp);
2431
2432unlock:
2433 unlock_mount(mp);
2434 return err;
2435}
2436
2437static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
2438
2439
2440
2441
2442
2443static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2444 int mnt_flags, const char *name, void *data)
2445{
2446 struct file_system_type *type;
2447 struct vfsmount *mnt;
2448 int err;
2449
2450 if (!fstype)
2451 return -EINVAL;
2452
2453 type = get_fs_type(fstype);
2454 if (!type)
2455 return -ENODEV;
2456
2457 mnt = vfs_kern_mount(type, sb_flags, name, data);
2458 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2459 !mnt->mnt_sb->s_subtype)
2460 mnt = fs_set_subtype(mnt, fstype);
2461
2462 put_filesystem(type);
2463 if (IS_ERR(mnt))
2464 return PTR_ERR(mnt);
2465
2466 if (mount_too_revealing(mnt, &mnt_flags)) {
2467 mntput(mnt);
2468 return -EPERM;
2469 }
2470
2471 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2472 if (err)
2473 mntput(mnt);
2474 return err;
2475}
2476
2477int finish_automount(struct vfsmount *m, struct path *path)
2478{
2479 struct mount *mnt = real_mount(m);
2480 int err;
2481
2482
2483
2484 BUG_ON(mnt_get_count(mnt) < 2);
2485
2486 if (m->mnt_sb == path->mnt->mnt_sb &&
2487 m->mnt_root == path->dentry) {
2488 err = -ELOOP;
2489 goto fail;
2490 }
2491
2492 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2493 if (!err)
2494 return 0;
2495fail:
2496
2497 if (!list_empty(&mnt->mnt_expire)) {
2498 namespace_lock();
2499 list_del_init(&mnt->mnt_expire);
2500 namespace_unlock();
2501 }
2502 mntput(m);
2503 mntput(m);
2504 return err;
2505}
2506
2507
2508
2509
2510
2511
2512void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2513{
2514 namespace_lock();
2515
2516 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2517
2518 namespace_unlock();
2519}
2520EXPORT_SYMBOL(mnt_set_expiry);
2521
2522
2523
2524
2525
2526
2527void mark_mounts_for_expiry(struct list_head *mounts)
2528{
2529 struct mount *mnt, *next;
2530 LIST_HEAD(graveyard);
2531
2532 if (list_empty(mounts))
2533 return;
2534
2535 namespace_lock();
2536 lock_mount_hash();
2537
2538
2539
2540
2541
2542
2543
2544 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2545 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2546 propagate_mount_busy(mnt, 1))
2547 continue;
2548 list_move(&mnt->mnt_expire, &graveyard);
2549 }
2550 while (!list_empty(&graveyard)) {
2551 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2552 touch_mnt_namespace(mnt->mnt_ns);
2553 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2554 }
2555 unlock_mount_hash();
2556 namespace_unlock();
2557}
2558
2559EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2560
2561
2562
2563
2564
2565
2566
2567static int select_submounts(struct mount *parent, struct list_head *graveyard)
2568{
2569 struct mount *this_parent = parent;
2570 struct list_head *next;
2571 int found = 0;
2572
2573repeat:
2574 next = this_parent->mnt_mounts.next;
2575resume:
2576 while (next != &this_parent->mnt_mounts) {
2577 struct list_head *tmp = next;
2578 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2579
2580 next = tmp->next;
2581 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2582 continue;
2583
2584
2585
2586 if (!list_empty(&mnt->mnt_mounts)) {
2587 this_parent = mnt;
2588 goto repeat;
2589 }
2590
2591 if (!propagate_mount_busy(mnt, 1)) {
2592 list_move_tail(&mnt->mnt_expire, graveyard);
2593 found++;
2594 }
2595 }
2596
2597
2598
2599 if (this_parent != parent) {
2600 next = this_parent->mnt_child.next;
2601 this_parent = this_parent->mnt_parent;
2602 goto resume;
2603 }
2604 return found;
2605}
2606
2607
2608
2609
2610
2611
2612
2613static void shrink_submounts(struct mount *mnt)
2614{
2615 LIST_HEAD(graveyard);
2616 struct mount *m;
2617
2618
2619 while (select_submounts(mnt, &graveyard)) {
2620 while (!list_empty(&graveyard)) {
2621 m = list_first_entry(&graveyard, struct mount,
2622 mnt_expire);
2623 touch_mnt_namespace(m->mnt_ns);
2624 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2625 }
2626 }
2627}
2628
2629
2630
2631
2632
2633
2634
2635static long exact_copy_from_user(void *to, const void __user * from,
2636 unsigned long n)
2637{
2638 char *t = to;
2639 const char __user *f = from;
2640 char c;
2641
2642 if (!access_ok(VERIFY_READ, from, n))
2643 return n;
2644
2645 while (n) {
2646 if (__get_user(c, f)) {
2647 memset(t, 0, n);
2648 break;
2649 }
2650 *t++ = c;
2651 f++;
2652 n--;
2653 }
2654 return n;
2655}
2656
2657void *copy_mount_options(const void __user * data)
2658{
2659 int i;
2660 unsigned long size;
2661 char *copy;
2662
2663 if (!data)
2664 return NULL;
2665
2666 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2667 if (!copy)
2668 return ERR_PTR(-ENOMEM);
2669
2670
2671
2672
2673
2674
2675 size = TASK_SIZE - (unsigned long)data;
2676 if (size > PAGE_SIZE)
2677 size = PAGE_SIZE;
2678
2679 i = size - exact_copy_from_user(copy, data, size);
2680 if (!i) {
2681 kfree(copy);
2682 return ERR_PTR(-EFAULT);
2683 }
2684 if (i != PAGE_SIZE)
2685 memset(copy + i, 0, PAGE_SIZE - i);
2686 return copy;
2687}
2688
2689char *copy_mount_string(const void __user *data)
2690{
2691 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2692}
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708long do_mount(const char *dev_name, const char __user *dir_name,
2709 const char *type_page, unsigned long flags, void *data_page)
2710{
2711 struct path path;
2712 unsigned int mnt_flags = 0, sb_flags;
2713 int retval = 0;
2714
2715
2716 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2717 flags &= ~MS_MGC_MSK;
2718
2719
2720 if (data_page)
2721 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2722
2723 if (flags & MS_NOUSER)
2724 return -EINVAL;
2725
2726
2727 retval = user_path(dir_name, &path);
2728 if (retval)
2729 return retval;
2730
2731 retval = security_sb_mount(dev_name, &path,
2732 type_page, flags, data_page);
2733 if (!retval && !may_mount())
2734 retval = -EPERM;
2735 if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
2736 retval = -EPERM;
2737 if (retval)
2738 goto dput_out;
2739
2740
2741 if (!(flags & MS_NOATIME))
2742 mnt_flags |= MNT_RELATIME;
2743
2744
2745 if (flags & MS_NOSUID)
2746 mnt_flags |= MNT_NOSUID;
2747 if (flags & MS_NODEV)
2748 mnt_flags |= MNT_NODEV;
2749 if (flags & MS_NOEXEC)
2750 mnt_flags |= MNT_NOEXEC;
2751 if (flags & MS_NOATIME)
2752 mnt_flags |= MNT_NOATIME;
2753 if (flags & MS_NODIRATIME)
2754 mnt_flags |= MNT_NODIRATIME;
2755 if (flags & MS_STRICTATIME)
2756 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2757 if (flags & MS_RDONLY)
2758 mnt_flags |= MNT_READONLY;
2759
2760
2761 if ((flags & MS_REMOUNT) &&
2762 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2763 MS_STRICTATIME)) == 0)) {
2764 mnt_flags &= ~MNT_ATIME_MASK;
2765 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2766 }
2767
2768 sb_flags = flags & (SB_RDONLY |
2769 SB_SYNCHRONOUS |
2770 SB_MANDLOCK |
2771 SB_DIRSYNC |
2772 SB_SILENT |
2773 SB_POSIXACL |
2774 SB_LAZYTIME |
2775 SB_I_VERSION);
2776
2777 if (flags & MS_REMOUNT)
2778 retval = do_remount(&path, flags, sb_flags, mnt_flags,
2779 data_page);
2780 else if (flags & MS_BIND)
2781 retval = do_loopback(&path, dev_name, flags & MS_REC);
2782 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2783 retval = do_change_type(&path, flags);
2784 else if (flags & MS_MOVE)
2785 retval = do_move_mount(&path, dev_name);
2786 else
2787 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
2788 dev_name, data_page);
2789dput_out:
2790 path_put(&path);
2791 return retval;
2792}
2793
2794static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
2795{
2796 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
2797}
2798
2799static void dec_mnt_namespaces(struct ucounts *ucounts)
2800{
2801 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
2802}
2803
2804static void free_mnt_ns(struct mnt_namespace *ns)
2805{
2806 ns_free_inum(&ns->ns);
2807 dec_mnt_namespaces(ns->ucounts);
2808 put_user_ns(ns->user_ns);
2809 kfree(ns);
2810}
2811
2812
2813
2814
2815
2816
2817
2818
2819static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2820
2821static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2822{
2823 struct mnt_namespace *new_ns;
2824 struct ucounts *ucounts;
2825 int ret;
2826
2827 ucounts = inc_mnt_namespaces(user_ns);
2828 if (!ucounts)
2829 return ERR_PTR(-ENOSPC);
2830
2831 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2832 if (!new_ns) {
2833 dec_mnt_namespaces(ucounts);
2834 return ERR_PTR(-ENOMEM);
2835 }
2836 ret = ns_alloc_inum(&new_ns->ns);
2837 if (ret) {
2838 kfree(new_ns);
2839 dec_mnt_namespaces(ucounts);
2840 return ERR_PTR(ret);
2841 }
2842 new_ns->ns.ops = &mntns_operations;
2843 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2844 atomic_set(&new_ns->count, 1);
2845 new_ns->root = NULL;
2846 INIT_LIST_HEAD(&new_ns->list);
2847 init_waitqueue_head(&new_ns->poll);
2848 new_ns->event = 0;
2849 new_ns->user_ns = get_user_ns(user_ns);
2850 new_ns->ucounts = ucounts;
2851 new_ns->mounts = 0;
2852 new_ns->pending_mounts = 0;
2853 return new_ns;
2854}
2855
2856__latent_entropy
2857struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2858 struct user_namespace *user_ns, struct fs_struct *new_fs)
2859{
2860 struct mnt_namespace *new_ns;
2861 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2862 struct mount *p, *q;
2863 struct mount *old;
2864 struct mount *new;
2865 int copy_flags;
2866
2867 BUG_ON(!ns);
2868
2869 if (likely(!(flags & CLONE_NEWNS))) {
2870 get_mnt_ns(ns);
2871 return ns;
2872 }
2873
2874 old = ns->root;
2875
2876 new_ns = alloc_mnt_ns(user_ns);
2877 if (IS_ERR(new_ns))
2878 return new_ns;
2879
2880 namespace_lock();
2881
2882 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2883 if (user_ns != ns->user_ns)
2884 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2885 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2886 if (IS_ERR(new)) {
2887 namespace_unlock();
2888 free_mnt_ns(new_ns);
2889 return ERR_CAST(new);
2890 }
2891 new_ns->root = new;
2892 list_add_tail(&new_ns->list, &new->mnt_list);
2893
2894
2895
2896
2897
2898
2899 p = old;
2900 q = new;
2901 while (p) {
2902 q->mnt_ns = new_ns;
2903 new_ns->mounts++;
2904 if (new_fs) {
2905 if (&p->mnt == new_fs->root.mnt) {
2906 new_fs->root.mnt = mntget(&q->mnt);
2907 rootmnt = &p->mnt;
2908 }
2909 if (&p->mnt == new_fs->pwd.mnt) {
2910 new_fs->pwd.mnt = mntget(&q->mnt);
2911 pwdmnt = &p->mnt;
2912 }
2913 }
2914 p = next_mnt(p, old);
2915 q = next_mnt(q, new);
2916 if (!q)
2917 break;
2918 while (p->mnt.mnt_root != q->mnt.mnt_root)
2919 p = next_mnt(p, old);
2920 }
2921 namespace_unlock();
2922
2923 if (rootmnt)
2924 mntput(rootmnt);
2925 if (pwdmnt)
2926 mntput(pwdmnt);
2927
2928 return new_ns;
2929}
2930
2931
2932
2933
2934
2935static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2936{
2937 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2938 if (!IS_ERR(new_ns)) {
2939 struct mount *mnt = real_mount(m);
2940 mnt->mnt_ns = new_ns;
2941 new_ns->root = mnt;
2942 new_ns->mounts++;
2943 list_add(&mnt->mnt_list, &new_ns->list);
2944 } else {
2945 mntput(m);
2946 }
2947 return new_ns;
2948}
2949
2950struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2951{
2952 struct mnt_namespace *ns;
2953 struct super_block *s;
2954 struct path path;
2955 int err;
2956
2957 ns = create_mnt_ns(mnt);
2958 if (IS_ERR(ns))
2959 return ERR_CAST(ns);
2960
2961 err = vfs_path_lookup(mnt->mnt_root, mnt,
2962 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2963
2964 put_mnt_ns(ns);
2965
2966 if (err)
2967 return ERR_PTR(err);
2968
2969
2970 s = path.mnt->mnt_sb;
2971 atomic_inc(&s->s_active);
2972 mntput(path.mnt);
2973
2974 down_write(&s->s_umount);
2975
2976 return path.dentry;
2977}
2978EXPORT_SYMBOL(mount_subtree);
2979
2980int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
2981 unsigned long flags, void __user *data)
2982{
2983 int ret;
2984 char *kernel_type;
2985 char *kernel_dev;
2986 void *options;
2987
2988 kernel_type = copy_mount_string(type);
2989 ret = PTR_ERR(kernel_type);
2990 if (IS_ERR(kernel_type))
2991 goto out_type;
2992
2993 kernel_dev = copy_mount_string(dev_name);
2994 ret = PTR_ERR(kernel_dev);
2995 if (IS_ERR(kernel_dev))
2996 goto out_dev;
2997
2998 options = copy_mount_options(data);
2999 ret = PTR_ERR(options);
3000 if (IS_ERR(options))
3001 goto out_data;
3002
3003 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3004
3005 kfree(options);
3006out_data:
3007 kfree(kernel_dev);
3008out_dev:
3009 kfree(kernel_type);
3010out_type:
3011 return ret;
3012}
3013
3014SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3015 char __user *, type, unsigned long, flags, void __user *, data)
3016{
3017 return ksys_mount(dev_name, dir_name, type, flags, data);
3018}
3019
3020
3021
3022
3023
3024
3025bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3026 const struct path *root)
3027{
3028 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3029 dentry = mnt->mnt_mountpoint;
3030 mnt = mnt->mnt_parent;
3031 }
3032 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3033}
3034
3035bool path_is_under(const struct path *path1, const struct path *path2)
3036{
3037 bool res;
3038 read_seqlock_excl(&mount_lock);
3039 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3040 read_sequnlock_excl(&mount_lock);
3041 return res;
3042}
3043EXPORT_SYMBOL(path_is_under);
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3071 const char __user *, put_old)
3072{
3073 struct path new, old, parent_path, root_parent, root;
3074 struct mount *new_mnt, *root_mnt, *old_mnt;
3075 struct mountpoint *old_mp, *root_mp;
3076 int error;
3077
3078 if (!may_mount())
3079 return -EPERM;
3080
3081 error = user_path_dir(new_root, &new);
3082 if (error)
3083 goto out0;
3084
3085 error = user_path_dir(put_old, &old);
3086 if (error)
3087 goto out1;
3088
3089 error = security_sb_pivotroot(&old, &new);
3090 if (error)
3091 goto out2;
3092
3093 get_fs_root(current->fs, &root);
3094 old_mp = lock_mount(&old);
3095 error = PTR_ERR(old_mp);
3096 if (IS_ERR(old_mp))
3097 goto out3;
3098
3099 error = -EINVAL;
3100 new_mnt = real_mount(new.mnt);
3101 root_mnt = real_mount(root.mnt);
3102 old_mnt = real_mount(old.mnt);
3103 if (IS_MNT_SHARED(old_mnt) ||
3104 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3105 IS_MNT_SHARED(root_mnt->mnt_parent))
3106 goto out4;
3107 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3108 goto out4;
3109 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3110 goto out4;
3111 error = -ENOENT;
3112 if (d_unlinked(new.dentry))
3113 goto out4;
3114 error = -EBUSY;
3115 if (new_mnt == root_mnt || old_mnt == root_mnt)
3116 goto out4;
3117 error = -EINVAL;
3118 if (root.mnt->mnt_root != root.dentry)
3119 goto out4;
3120 if (!mnt_has_parent(root_mnt))
3121 goto out4;
3122 root_mp = root_mnt->mnt_mp;
3123 if (new.mnt->mnt_root != new.dentry)
3124 goto out4;
3125 if (!mnt_has_parent(new_mnt))
3126 goto out4;
3127
3128 if (!is_path_reachable(old_mnt, old.dentry, &new))
3129 goto out4;
3130
3131 if (!is_path_reachable(new_mnt, new.dentry, &root))
3132 goto out4;
3133 root_mp->m_count++;
3134 lock_mount_hash();
3135 detach_mnt(new_mnt, &parent_path);
3136 detach_mnt(root_mnt, &root_parent);
3137 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3138 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3139 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3140 }
3141
3142 attach_mnt(root_mnt, old_mnt, old_mp);
3143
3144 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3145 touch_mnt_namespace(current->nsproxy->mnt_ns);
3146
3147 list_del_init(&new_mnt->mnt_expire);
3148 put_mountpoint(root_mp);
3149 unlock_mount_hash();
3150 chroot_fs_refs(&root, &new);
3151 error = 0;
3152out4:
3153 unlock_mount(old_mp);
3154 if (!error) {
3155 path_put(&root_parent);
3156 path_put(&parent_path);
3157 }
3158out3:
3159 path_put(&root);
3160out2:
3161 path_put(&old);
3162out1:
3163 path_put(&new);
3164out0:
3165 return error;
3166}
3167
3168static void __init init_mount_tree(void)
3169{
3170 struct vfsmount *mnt;
3171 struct mnt_namespace *ns;
3172 struct path root;
3173 struct file_system_type *type;
3174
3175 type = get_fs_type("rootfs");
3176 if (!type)
3177 panic("Can't find rootfs type");
3178 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3179 put_filesystem(type);
3180 if (IS_ERR(mnt))
3181 panic("Can't create rootfs");
3182
3183 ns = create_mnt_ns(mnt);
3184 if (IS_ERR(ns))
3185 panic("Can't allocate initial namespace");
3186
3187 init_task.nsproxy->mnt_ns = ns;
3188 get_mnt_ns(ns);
3189
3190 root.mnt = mnt;
3191 root.dentry = mnt->mnt_root;
3192 mnt->mnt_flags |= MNT_LOCKED;
3193
3194 set_fs_pwd(current->fs, &root);
3195 set_fs_root(current->fs, &root);
3196}
3197
3198void __init mnt_init(void)
3199{
3200 int err;
3201
3202 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3203 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3204
3205 mount_hashtable = alloc_large_system_hash("Mount-cache",
3206 sizeof(struct hlist_head),
3207 mhash_entries, 19,
3208 HASH_ZERO,
3209 &m_hash_shift, &m_hash_mask, 0, 0);
3210 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3211 sizeof(struct hlist_head),
3212 mphash_entries, 19,
3213 HASH_ZERO,
3214 &mp_hash_shift, &mp_hash_mask, 0, 0);
3215
3216 if (!mount_hashtable || !mountpoint_hashtable)
3217 panic("Failed to allocate mount hash table\n");
3218
3219 kernfs_init();
3220
3221 err = sysfs_init();
3222 if (err)
3223 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3224 __func__, err);
3225 fs_kobj = kobject_create_and_add("fs", NULL);
3226 if (!fs_kobj)
3227 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3228 init_rootfs();
3229 init_mount_tree();
3230}
3231
3232void put_mnt_ns(struct mnt_namespace *ns)
3233{
3234 if (!atomic_dec_and_test(&ns->count))
3235 return;
3236 drop_collected_mounts(&ns->root->mnt);
3237 free_mnt_ns(ns);
3238}
3239
3240struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3241{
3242 struct vfsmount *mnt;
3243 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
3244 if (!IS_ERR(mnt)) {
3245
3246
3247
3248
3249 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3250 }
3251 return mnt;
3252}
3253EXPORT_SYMBOL_GPL(kern_mount_data);
3254
3255void kern_unmount(struct vfsmount *mnt)
3256{
3257
3258 if (!IS_ERR_OR_NULL(mnt)) {
3259 real_mount(mnt)->mnt_ns = NULL;
3260 synchronize_rcu();
3261 mntput(mnt);
3262 }
3263}
3264EXPORT_SYMBOL(kern_unmount);
3265
3266bool our_mnt(struct vfsmount *mnt)
3267{
3268 return check_mnt(real_mount(mnt));
3269}
3270
3271bool current_chrooted(void)
3272{
3273
3274 struct path ns_root;
3275 struct path fs_root;
3276 bool chrooted;
3277
3278
3279 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3280 ns_root.dentry = ns_root.mnt->mnt_root;
3281 path_get(&ns_root);
3282 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3283 ;
3284
3285 get_fs_root(current->fs, &fs_root);
3286
3287 chrooted = !path_equal(&fs_root, &ns_root);
3288
3289 path_put(&fs_root);
3290 path_put(&ns_root);
3291
3292 return chrooted;
3293}
3294
3295static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
3296 int *new_mnt_flags)
3297{
3298 int new_flags = *new_mnt_flags;
3299 struct mount *mnt;
3300 bool visible = false;
3301
3302 down_read(&namespace_sem);
3303 list_for_each_entry(mnt, &ns->list, mnt_list) {
3304 struct mount *child;
3305 int mnt_flags;
3306
3307 if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
3308 continue;
3309
3310
3311
3312
3313 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3314 continue;
3315
3316
3317 mnt_flags = mnt->mnt.mnt_flags;
3318
3319
3320 if (sb_rdonly(mnt->mnt.mnt_sb))
3321 mnt_flags |= MNT_LOCK_READONLY;
3322
3323
3324
3325
3326 if ((mnt_flags & MNT_LOCK_READONLY) &&
3327 !(new_flags & MNT_READONLY))
3328 continue;
3329 if ((mnt_flags & MNT_LOCK_ATIME) &&
3330 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3331 continue;
3332
3333
3334
3335
3336
3337 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3338 struct inode *inode = child->mnt_mountpoint->d_inode;
3339
3340 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3341 continue;
3342
3343 if (!is_empty_dir_inode(inode))
3344 goto next;
3345 }
3346
3347 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3348 MNT_LOCK_ATIME);
3349 visible = true;
3350 goto found;
3351 next: ;
3352 }
3353found:
3354 up_read(&namespace_sem);
3355 return visible;
3356}
3357
3358static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3359{
3360 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3361 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3362 unsigned long s_iflags;
3363
3364 if (ns->user_ns == &init_user_ns)
3365 return false;
3366
3367
3368 s_iflags = mnt->mnt_sb->s_iflags;
3369 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3370 return false;
3371
3372 if ((s_iflags & required_iflags) != required_iflags) {
3373 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3374 required_iflags);
3375 return true;
3376 }
3377
3378 return !mnt_already_visible(ns, mnt, new_mnt_flags);
3379}
3380
3381bool mnt_may_suid(struct vfsmount *mnt)
3382{
3383
3384
3385
3386
3387
3388
3389
3390 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
3391 current_in_userns(mnt->mnt_sb->s_user_ns);
3392}
3393
3394static struct ns_common *mntns_get(struct task_struct *task)
3395{
3396 struct ns_common *ns = NULL;
3397 struct nsproxy *nsproxy;
3398
3399 task_lock(task);
3400 nsproxy = task->nsproxy;
3401 if (nsproxy) {
3402 ns = &nsproxy->mnt_ns->ns;
3403 get_mnt_ns(to_mnt_ns(ns));
3404 }
3405 task_unlock(task);
3406
3407 return ns;
3408}
3409
3410static void mntns_put(struct ns_common *ns)
3411{
3412 put_mnt_ns(to_mnt_ns(ns));
3413}
3414
3415static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3416{
3417 struct fs_struct *fs = current->fs;
3418 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
3419 struct path root;
3420 int err;
3421
3422 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3423 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3424 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3425 return -EPERM;
3426
3427 if (fs->users != 1)
3428 return -EINVAL;
3429
3430 get_mnt_ns(mnt_ns);
3431 old_mnt_ns = nsproxy->mnt_ns;
3432 nsproxy->mnt_ns = mnt_ns;
3433
3434
3435 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
3436 "/", LOOKUP_DOWN, &root);
3437 if (err) {
3438
3439 nsproxy->mnt_ns = old_mnt_ns;
3440 put_mnt_ns(mnt_ns);
3441 return err;
3442 }
3443
3444 put_mnt_ns(old_mnt_ns);
3445
3446
3447 set_fs_pwd(fs, &root);
3448 set_fs_root(fs, &root);
3449
3450 path_put(&root);
3451 return 0;
3452}
3453
3454static struct user_namespace *mntns_owner(struct ns_common *ns)
3455{
3456 return to_mnt_ns(ns)->user_ns;
3457}
3458
3459const struct proc_ns_operations mntns_operations = {
3460 .name = "mnt",
3461 .type = CLONE_NEWNS,
3462 .get = mntns_get,
3463 .put = mntns_put,
3464 .install = mntns_install,
3465 .owner = mntns_owner,
3466};
3467