1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/file.h>
24#include <linux/uaccess.h>
25#include <linux/proc_ns.h>
26#include <linux/magic.h>
27#include <linux/memblock.h>
28#include <linux/proc_fs.h>
29#include <linux/task_work.h>
30#include <linux/sched/task.h>
31#include <uapi/linux/mount.h>
32#include <linux/fs_context.h>
33#include <linux/shmem_fs.h>
34#include <linux/mnt_idmapping.h>
35
36#include "pnode.h"
37#include "internal.h"
38
39
40static unsigned int sysctl_mount_max __read_mostly = 100000;
41
42static unsigned int m_hash_mask __read_mostly;
43static unsigned int m_hash_shift __read_mostly;
44static unsigned int mp_hash_mask __read_mostly;
45static unsigned int mp_hash_shift __read_mostly;
46
47static __initdata unsigned long mhash_entries;
48static int __init set_mhash_entries(char *str)
49{
50 if (!str)
51 return 0;
52 mhash_entries = simple_strtoul(str, &str, 0);
53 return 1;
54}
55__setup("mhash_entries=", set_mhash_entries);
56
57static __initdata unsigned long mphash_entries;
58static int __init set_mphash_entries(char *str)
59{
60 if (!str)
61 return 0;
62 mphash_entries = simple_strtoul(str, &str, 0);
63 return 1;
64}
65__setup("mphash_entries=", set_mphash_entries);
66
67static u64 event;
68static DEFINE_IDA(mnt_id_ida);
69static DEFINE_IDA(mnt_group_ida);
70
71static struct hlist_head *mount_hashtable __read_mostly;
72static struct hlist_head *mountpoint_hashtable __read_mostly;
73static struct kmem_cache *mnt_cache __read_mostly;
74static DECLARE_RWSEM(namespace_sem);
75static HLIST_HEAD(unmounted);
76static LIST_HEAD(ex_mountpoints);
77
78struct mount_kattr {
79 unsigned int attr_set;
80 unsigned int attr_clr;
81 unsigned int propagation;
82 unsigned int lookup_flags;
83 bool recurse;
84 struct user_namespace *mnt_userns;
85};
86
87
88struct kobject *fs_kobj;
89EXPORT_SYMBOL_GPL(fs_kobj);
90
91
92
93
94
95
96
97
98
99__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
100
101static inline void lock_mount_hash(void)
102{
103 write_seqlock(&mount_lock);
104}
105
106static inline void unlock_mount_hash(void)
107{
108 write_sequnlock(&mount_lock);
109}
110
111static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
112{
113 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
114 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
115 tmp = tmp + (tmp >> m_hash_shift);
116 return &mount_hashtable[tmp & m_hash_mask];
117}
118
119static inline struct hlist_head *mp_hash(struct dentry *dentry)
120{
121 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
122 tmp = tmp + (tmp >> mp_hash_shift);
123 return &mountpoint_hashtable[tmp & mp_hash_mask];
124}
125
126static int mnt_alloc_id(struct mount *mnt)
127{
128 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
129
130 if (res < 0)
131 return res;
132 mnt->mnt_id = res;
133 return 0;
134}
135
136static void mnt_free_id(struct mount *mnt)
137{
138 ida_free(&mnt_id_ida, mnt->mnt_id);
139}
140
141
142
143
144static int mnt_alloc_group_id(struct mount *mnt)
145{
146 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
147
148 if (res < 0)
149 return res;
150 mnt->mnt_group_id = res;
151 return 0;
152}
153
154
155
156
157void mnt_release_group_id(struct mount *mnt)
158{
159 ida_free(&mnt_group_ida, mnt->mnt_group_id);
160 mnt->mnt_group_id = 0;
161}
162
163
164
165
166static inline void mnt_add_count(struct mount *mnt, int n)
167{
168#ifdef CONFIG_SMP
169 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
170#else
171 preempt_disable();
172 mnt->mnt_count += n;
173 preempt_enable();
174#endif
175}
176
177
178
179
180int mnt_get_count(struct mount *mnt)
181{
182#ifdef CONFIG_SMP
183 int count = 0;
184 int cpu;
185
186 for_each_possible_cpu(cpu) {
187 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
188 }
189
190 return count;
191#else
192 return mnt->mnt_count;
193#endif
194}
195
196static struct mount *alloc_vfsmnt(const char *name)
197{
198 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
199 if (mnt) {
200 int err;
201
202 err = mnt_alloc_id(mnt);
203 if (err)
204 goto out_free_cache;
205
206 if (name) {
207 mnt->mnt_devname = kstrdup_const(name,
208 GFP_KERNEL_ACCOUNT);
209 if (!mnt->mnt_devname)
210 goto out_free_id;
211 }
212
213#ifdef CONFIG_SMP
214 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
215 if (!mnt->mnt_pcp)
216 goto out_free_devname;
217
218 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
219#else
220 mnt->mnt_count = 1;
221 mnt->mnt_writers = 0;
222#endif
223
224 INIT_HLIST_NODE(&mnt->mnt_hash);
225 INIT_LIST_HEAD(&mnt->mnt_child);
226 INIT_LIST_HEAD(&mnt->mnt_mounts);
227 INIT_LIST_HEAD(&mnt->mnt_list);
228 INIT_LIST_HEAD(&mnt->mnt_expire);
229 INIT_LIST_HEAD(&mnt->mnt_share);
230 INIT_LIST_HEAD(&mnt->mnt_slave_list);
231 INIT_LIST_HEAD(&mnt->mnt_slave);
232 INIT_HLIST_NODE(&mnt->mnt_mp_list);
233 INIT_LIST_HEAD(&mnt->mnt_umounting);
234 INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
235 mnt->mnt.mnt_userns = &init_user_ns;
236 }
237 return mnt;
238
239#ifdef CONFIG_SMP
240out_free_devname:
241 kfree_const(mnt->mnt_devname);
242#endif
243out_free_id:
244 mnt_free_id(mnt);
245out_free_cache:
246 kmem_cache_free(mnt_cache, mnt);
247 return NULL;
248}
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269bool __mnt_is_readonly(struct vfsmount *mnt)
270{
271 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
272}
273EXPORT_SYMBOL_GPL(__mnt_is_readonly);
274
275static inline void mnt_inc_writers(struct mount *mnt)
276{
277#ifdef CONFIG_SMP
278 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
279#else
280 mnt->mnt_writers++;
281#endif
282}
283
284static inline void mnt_dec_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers--;
290#endif
291}
292
293static unsigned int mnt_get_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 unsigned int count = 0;
297 int cpu;
298
299 for_each_possible_cpu(cpu) {
300 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
301 }
302
303 return count;
304#else
305 return mnt->mnt_writers;
306#endif
307}
308
309static int mnt_is_readonly(struct vfsmount *mnt)
310{
311 if (mnt->mnt_sb->s_readonly_remount)
312 return 1;
313
314 smp_rmb();
315 return __mnt_is_readonly(mnt);
316}
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334int __mnt_want_write(struct vfsmount *m)
335{
336 struct mount *mnt = real_mount(m);
337 int ret = 0;
338
339 preempt_disable();
340 mnt_inc_writers(mnt);
341
342
343
344
345
346 smp_mb();
347 might_lock(&mount_lock.lock);
348 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
349 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
350 cpu_relax();
351 } else {
352
353
354
355
356
357
358
359 preempt_enable();
360 lock_mount_hash();
361 unlock_mount_hash();
362 preempt_disable();
363 }
364 }
365
366
367
368
369
370 smp_rmb();
371 if (mnt_is_readonly(m)) {
372 mnt_dec_writers(mnt);
373 ret = -EROFS;
374 }
375 preempt_enable();
376
377 return ret;
378}
379
380
381
382
383
384
385
386
387
388
389int mnt_want_write(struct vfsmount *m)
390{
391 int ret;
392
393 sb_start_write(m->mnt_sb);
394 ret = __mnt_want_write(m);
395 if (ret)
396 sb_end_write(m->mnt_sb);
397 return ret;
398}
399EXPORT_SYMBOL_GPL(mnt_want_write);
400
401
402
403
404
405
406
407
408
409
410int __mnt_want_write_file(struct file *file)
411{
412 if (file->f_mode & FMODE_WRITER) {
413
414
415
416
417 if (__mnt_is_readonly(file->f_path.mnt))
418 return -EROFS;
419 return 0;
420 }
421 return __mnt_want_write(file->f_path.mnt);
422}
423
424
425
426
427
428
429
430
431
432
433int mnt_want_write_file(struct file *file)
434{
435 int ret;
436
437 sb_start_write(file_inode(file)->i_sb);
438 ret = __mnt_want_write_file(file);
439 if (ret)
440 sb_end_write(file_inode(file)->i_sb);
441 return ret;
442}
443EXPORT_SYMBOL_GPL(mnt_want_write_file);
444
445
446
447
448
449
450
451
452
453void __mnt_drop_write(struct vfsmount *mnt)
454{
455 preempt_disable();
456 mnt_dec_writers(real_mount(mnt));
457 preempt_enable();
458}
459
460
461
462
463
464
465
466
467
468void mnt_drop_write(struct vfsmount *mnt)
469{
470 __mnt_drop_write(mnt);
471 sb_end_write(mnt->mnt_sb);
472}
473EXPORT_SYMBOL_GPL(mnt_drop_write);
474
475void __mnt_drop_write_file(struct file *file)
476{
477 if (!(file->f_mode & FMODE_WRITER))
478 __mnt_drop_write(file->f_path.mnt);
479}
480
481void mnt_drop_write_file(struct file *file)
482{
483 __mnt_drop_write_file(file);
484 sb_end_write(file_inode(file)->i_sb);
485}
486EXPORT_SYMBOL(mnt_drop_write_file);
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506static inline int mnt_hold_writers(struct mount *mnt)
507{
508 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
509
510
511
512
513 smp_mb();
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531 if (mnt_get_writers(mnt) > 0)
532 return -EBUSY;
533
534 return 0;
535}
536
537
538
539
540
541
542
543
544
545
546
547
548
549static inline void mnt_unhold_writers(struct mount *mnt)
550{
551
552
553
554
555 smp_wmb();
556 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
557}
558
559static int mnt_make_readonly(struct mount *mnt)
560{
561 int ret;
562
563 ret = mnt_hold_writers(mnt);
564 if (!ret)
565 mnt->mnt.mnt_flags |= MNT_READONLY;
566 mnt_unhold_writers(mnt);
567 return ret;
568}
569
570int sb_prepare_remount_readonly(struct super_block *sb)
571{
572 struct mount *mnt;
573 int err = 0;
574
575
576 if (atomic_long_read(&sb->s_remove_count))
577 return -EBUSY;
578
579 lock_mount_hash();
580 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
581 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
582 err = mnt_hold_writers(mnt);
583 if (err)
584 break;
585 }
586 }
587 if (!err && atomic_long_read(&sb->s_remove_count))
588 err = -EBUSY;
589
590 if (!err) {
591 sb->s_readonly_remount = 1;
592 smp_wmb();
593 }
594 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
595 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
596 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
597 }
598 unlock_mount_hash();
599
600 return err;
601}
602
603static void free_vfsmnt(struct mount *mnt)
604{
605 struct user_namespace *mnt_userns;
606
607 mnt_userns = mnt_user_ns(&mnt->mnt);
608 if (!initial_idmapping(mnt_userns))
609 put_user_ns(mnt_userns);
610 kfree_const(mnt->mnt_devname);
611#ifdef CONFIG_SMP
612 free_percpu(mnt->mnt_pcp);
613#endif
614 kmem_cache_free(mnt_cache, mnt);
615}
616
617static void delayed_free_vfsmnt(struct rcu_head *head)
618{
619 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
620}
621
622
623int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
624{
625 struct mount *mnt;
626 if (read_seqretry(&mount_lock, seq))
627 return 1;
628 if (bastard == NULL)
629 return 0;
630 mnt = real_mount(bastard);
631 mnt_add_count(mnt, 1);
632 smp_mb();
633 if (likely(!read_seqretry(&mount_lock, seq)))
634 return 0;
635 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
636 mnt_add_count(mnt, -1);
637 return 1;
638 }
639 lock_mount_hash();
640 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
641 mnt_add_count(mnt, -1);
642 unlock_mount_hash();
643 return 1;
644 }
645 unlock_mount_hash();
646
647 return -1;
648}
649
650
651bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
652{
653 int res = __legitimize_mnt(bastard, seq);
654 if (likely(!res))
655 return true;
656 if (unlikely(res < 0)) {
657 rcu_read_unlock();
658 mntput(bastard);
659 rcu_read_lock();
660 }
661 return false;
662}
663
664
665
666
667
668struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
669{
670 struct hlist_head *head = m_hash(mnt, dentry);
671 struct mount *p;
672
673 hlist_for_each_entry_rcu(p, head, mnt_hash)
674 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
675 return p;
676 return NULL;
677}
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695struct vfsmount *lookup_mnt(const struct path *path)
696{
697 struct mount *child_mnt;
698 struct vfsmount *m;
699 unsigned seq;
700
701 rcu_read_lock();
702 do {
703 seq = read_seqbegin(&mount_lock);
704 child_mnt = __lookup_mnt(path->mnt, path->dentry);
705 m = child_mnt ? &child_mnt->mnt : NULL;
706 } while (!legitimize_mnt(m, seq));
707 rcu_read_unlock();
708 return m;
709}
710
711static inline void lock_ns_list(struct mnt_namespace *ns)
712{
713 spin_lock(&ns->ns_lock);
714}
715
716static inline void unlock_ns_list(struct mnt_namespace *ns)
717{
718 spin_unlock(&ns->ns_lock);
719}
720
721static inline bool mnt_is_cursor(struct mount *mnt)
722{
723 return mnt->mnt.mnt_flags & MNT_CURSOR;
724}
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741bool __is_local_mountpoint(struct dentry *dentry)
742{
743 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
744 struct mount *mnt;
745 bool is_covered = false;
746
747 down_read(&namespace_sem);
748 lock_ns_list(ns);
749 list_for_each_entry(mnt, &ns->list, mnt_list) {
750 if (mnt_is_cursor(mnt))
751 continue;
752 is_covered = (mnt->mnt_mountpoint == dentry);
753 if (is_covered)
754 break;
755 }
756 unlock_ns_list(ns);
757 up_read(&namespace_sem);
758
759 return is_covered;
760}
761
762static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
763{
764 struct hlist_head *chain = mp_hash(dentry);
765 struct mountpoint *mp;
766
767 hlist_for_each_entry(mp, chain, m_hash) {
768 if (mp->m_dentry == dentry) {
769 mp->m_count++;
770 return mp;
771 }
772 }
773 return NULL;
774}
775
776static struct mountpoint *get_mountpoint(struct dentry *dentry)
777{
778 struct mountpoint *mp, *new = NULL;
779 int ret;
780
781 if (d_mountpoint(dentry)) {
782
783 if (d_unlinked(dentry))
784 return ERR_PTR(-ENOENT);
785mountpoint:
786 read_seqlock_excl(&mount_lock);
787 mp = lookup_mountpoint(dentry);
788 read_sequnlock_excl(&mount_lock);
789 if (mp)
790 goto done;
791 }
792
793 if (!new)
794 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
795 if (!new)
796 return ERR_PTR(-ENOMEM);
797
798
799
800 ret = d_set_mounted(dentry);
801
802
803 if (ret == -EBUSY)
804 goto mountpoint;
805
806
807 mp = ERR_PTR(ret);
808 if (ret)
809 goto done;
810
811
812 read_seqlock_excl(&mount_lock);
813 new->m_dentry = dget(dentry);
814 new->m_count = 1;
815 hlist_add_head(&new->m_hash, mp_hash(dentry));
816 INIT_HLIST_HEAD(&new->m_list);
817 read_sequnlock_excl(&mount_lock);
818
819 mp = new;
820 new = NULL;
821done:
822 kfree(new);
823 return mp;
824}
825
826
827
828
829
830static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
831{
832 if (!--mp->m_count) {
833 struct dentry *dentry = mp->m_dentry;
834 BUG_ON(!hlist_empty(&mp->m_list));
835 spin_lock(&dentry->d_lock);
836 dentry->d_flags &= ~DCACHE_MOUNTED;
837 spin_unlock(&dentry->d_lock);
838 dput_to_list(dentry, list);
839 hlist_del(&mp->m_hash);
840 kfree(mp);
841 }
842}
843
844
845static void put_mountpoint(struct mountpoint *mp)
846{
847 __put_mountpoint(mp, &ex_mountpoints);
848}
849
850static inline int check_mnt(struct mount *mnt)
851{
852 return mnt->mnt_ns == current->nsproxy->mnt_ns;
853}
854
855
856
857
858static void touch_mnt_namespace(struct mnt_namespace *ns)
859{
860 if (ns) {
861 ns->event = ++event;
862 wake_up_interruptible(&ns->poll);
863 }
864}
865
866
867
868
869static void __touch_mnt_namespace(struct mnt_namespace *ns)
870{
871 if (ns && ns->event != event) {
872 ns->event = event;
873 wake_up_interruptible(&ns->poll);
874 }
875}
876
877
878
879
880static struct mountpoint *unhash_mnt(struct mount *mnt)
881{
882 struct mountpoint *mp;
883 mnt->mnt_parent = mnt;
884 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
885 list_del_init(&mnt->mnt_child);
886 hlist_del_init_rcu(&mnt->mnt_hash);
887 hlist_del_init(&mnt->mnt_mp_list);
888 mp = mnt->mnt_mp;
889 mnt->mnt_mp = NULL;
890 return mp;
891}
892
893
894
895
896static void umount_mnt(struct mount *mnt)
897{
898 put_mountpoint(unhash_mnt(mnt));
899}
900
901
902
903
904void mnt_set_mountpoint(struct mount *mnt,
905 struct mountpoint *mp,
906 struct mount *child_mnt)
907{
908 mp->m_count++;
909 mnt_add_count(mnt, 1);
910 child_mnt->mnt_mountpoint = mp->m_dentry;
911 child_mnt->mnt_parent = mnt;
912 child_mnt->mnt_mp = mp;
913 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
914}
915
916static void __attach_mnt(struct mount *mnt, struct mount *parent)
917{
918 hlist_add_head_rcu(&mnt->mnt_hash,
919 m_hash(&parent->mnt, mnt->mnt_mountpoint));
920 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
921}
922
923
924
925
926static void attach_mnt(struct mount *mnt,
927 struct mount *parent,
928 struct mountpoint *mp)
929{
930 mnt_set_mountpoint(parent, mp, mnt);
931 __attach_mnt(mnt, parent);
932}
933
934void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
935{
936 struct mountpoint *old_mp = mnt->mnt_mp;
937 struct mount *old_parent = mnt->mnt_parent;
938
939 list_del_init(&mnt->mnt_child);
940 hlist_del_init(&mnt->mnt_mp_list);
941 hlist_del_init_rcu(&mnt->mnt_hash);
942
943 attach_mnt(mnt, parent, mp);
944
945 put_mountpoint(old_mp);
946 mnt_add_count(old_parent, -1);
947}
948
949
950
951
952static void commit_tree(struct mount *mnt)
953{
954 struct mount *parent = mnt->mnt_parent;
955 struct mount *m;
956 LIST_HEAD(head);
957 struct mnt_namespace *n = parent->mnt_ns;
958
959 BUG_ON(parent == mnt);
960
961 list_add_tail(&head, &mnt->mnt_list);
962 list_for_each_entry(m, &head, mnt_list)
963 m->mnt_ns = n;
964
965 list_splice(&head, n->list.prev);
966
967 n->mounts += n->pending_mounts;
968 n->pending_mounts = 0;
969
970 __attach_mnt(mnt, parent);
971 touch_mnt_namespace(n);
972}
973
974static struct mount *next_mnt(struct mount *p, struct mount *root)
975{
976 struct list_head *next = p->mnt_mounts.next;
977 if (next == &p->mnt_mounts) {
978 while (1) {
979 if (p == root)
980 return NULL;
981 next = p->mnt_child.next;
982 if (next != &p->mnt_parent->mnt_mounts)
983 break;
984 p = p->mnt_parent;
985 }
986 }
987 return list_entry(next, struct mount, mnt_child);
988}
989
990static struct mount *skip_mnt_tree(struct mount *p)
991{
992 struct list_head *prev = p->mnt_mounts.prev;
993 while (prev != &p->mnt_mounts) {
994 p = list_entry(prev, struct mount, mnt_child);
995 prev = p->mnt_mounts.prev;
996 }
997 return p;
998}
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009struct vfsmount *vfs_create_mount(struct fs_context *fc)
1010{
1011 struct mount *mnt;
1012 struct user_namespace *fs_userns;
1013
1014 if (!fc->root)
1015 return ERR_PTR(-EINVAL);
1016
1017 mnt = alloc_vfsmnt(fc->source ?: "none");
1018 if (!mnt)
1019 return ERR_PTR(-ENOMEM);
1020
1021 if (fc->sb_flags & SB_KERNMOUNT)
1022 mnt->mnt.mnt_flags = MNT_INTERNAL;
1023
1024 atomic_inc(&fc->root->d_sb->s_active);
1025 mnt->mnt.mnt_sb = fc->root->d_sb;
1026 mnt->mnt.mnt_root = dget(fc->root);
1027 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1028 mnt->mnt_parent = mnt;
1029
1030 fs_userns = mnt->mnt.mnt_sb->s_user_ns;
1031 if (!initial_idmapping(fs_userns))
1032 mnt->mnt.mnt_userns = get_user_ns(fs_userns);
1033
1034 lock_mount_hash();
1035 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
1036 unlock_mount_hash();
1037 return &mnt->mnt;
1038}
1039EXPORT_SYMBOL(vfs_create_mount);
1040
1041struct vfsmount *fc_mount(struct fs_context *fc)
1042{
1043 int err = vfs_get_tree(fc);
1044 if (!err) {
1045 up_write(&fc->root->d_sb->s_umount);
1046 return vfs_create_mount(fc);
1047 }
1048 return ERR_PTR(err);
1049}
1050EXPORT_SYMBOL(fc_mount);
1051
1052struct vfsmount *vfs_kern_mount(struct file_system_type *type,
1053 int flags, const char *name,
1054 void *data)
1055{
1056 struct fs_context *fc;
1057 struct vfsmount *mnt;
1058 int ret = 0;
1059
1060 if (!type)
1061 return ERR_PTR(-EINVAL);
1062
1063 fc = fs_context_for_mount(type, flags);
1064 if (IS_ERR(fc))
1065 return ERR_CAST(fc);
1066
1067 if (name)
1068 ret = vfs_parse_fs_string(fc, "source",
1069 name, strlen(name));
1070 if (!ret)
1071 ret = parse_monolithic_mount_data(fc, data);
1072 if (!ret)
1073 mnt = fc_mount(fc);
1074 else
1075 mnt = ERR_PTR(ret);
1076
1077 put_fs_context(fc);
1078 return mnt;
1079}
1080EXPORT_SYMBOL_GPL(vfs_kern_mount);
1081
1082struct vfsmount *
1083vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1084 const char *name, void *data)
1085{
1086
1087
1088
1089
1090 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1091 return ERR_PTR(-EPERM);
1092
1093 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1094}
1095EXPORT_SYMBOL_GPL(vfs_submount);
1096
1097static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1098 int flag)
1099{
1100 struct super_block *sb = old->mnt.mnt_sb;
1101 struct mount *mnt;
1102 int err;
1103
1104 mnt = alloc_vfsmnt(old->mnt_devname);
1105 if (!mnt)
1106 return ERR_PTR(-ENOMEM);
1107
1108 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1109 mnt->mnt_group_id = 0;
1110 else
1111 mnt->mnt_group_id = old->mnt_group_id;
1112
1113 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1114 err = mnt_alloc_group_id(mnt);
1115 if (err)
1116 goto out_free;
1117 }
1118
1119 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1120 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1121
1122 atomic_inc(&sb->s_active);
1123 mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
1124 if (!initial_idmapping(mnt->mnt.mnt_userns))
1125 mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
1126 mnt->mnt.mnt_sb = sb;
1127 mnt->mnt.mnt_root = dget(root);
1128 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1129 mnt->mnt_parent = mnt;
1130 lock_mount_hash();
1131 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1132 unlock_mount_hash();
1133
1134 if ((flag & CL_SLAVE) ||
1135 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1136 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1137 mnt->mnt_master = old;
1138 CLEAR_MNT_SHARED(mnt);
1139 } else if (!(flag & CL_PRIVATE)) {
1140 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1141 list_add(&mnt->mnt_share, &old->mnt_share);
1142 if (IS_MNT_SLAVE(old))
1143 list_add(&mnt->mnt_slave, &old->mnt_slave);
1144 mnt->mnt_master = old->mnt_master;
1145 } else {
1146 CLEAR_MNT_SHARED(mnt);
1147 }
1148 if (flag & CL_MAKE_SHARED)
1149 set_mnt_shared(mnt);
1150
1151
1152
1153 if (flag & CL_EXPIRE) {
1154 if (!list_empty(&old->mnt_expire))
1155 list_add(&mnt->mnt_expire, &old->mnt_expire);
1156 }
1157
1158 return mnt;
1159
1160 out_free:
1161 mnt_free_id(mnt);
1162 free_vfsmnt(mnt);
1163 return ERR_PTR(err);
1164}
1165
1166static void cleanup_mnt(struct mount *mnt)
1167{
1168 struct hlist_node *p;
1169 struct mount *m;
1170
1171
1172
1173
1174
1175
1176
1177 WARN_ON(mnt_get_writers(mnt));
1178 if (unlikely(mnt->mnt_pins.first))
1179 mnt_pin_kill(mnt);
1180 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
1181 hlist_del(&m->mnt_umount);
1182 mntput(&m->mnt);
1183 }
1184 fsnotify_vfsmount_delete(&mnt->mnt);
1185 dput(mnt->mnt.mnt_root);
1186 deactivate_super(mnt->mnt.mnt_sb);
1187 mnt_free_id(mnt);
1188 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1189}
1190
1191static void __cleanup_mnt(struct rcu_head *head)
1192{
1193 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1194}
1195
1196static LLIST_HEAD(delayed_mntput_list);
1197static void delayed_mntput(struct work_struct *unused)
1198{
1199 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1200 struct mount *m, *t;
1201
1202 llist_for_each_entry_safe(m, t, node, mnt_llist)
1203 cleanup_mnt(m);
1204}
1205static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1206
1207static void mntput_no_expire(struct mount *mnt)
1208{
1209 LIST_HEAD(list);
1210 int count;
1211
1212 rcu_read_lock();
1213 if (likely(READ_ONCE(mnt->mnt_ns))) {
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223 mnt_add_count(mnt, -1);
1224 rcu_read_unlock();
1225 return;
1226 }
1227 lock_mount_hash();
1228
1229
1230
1231
1232 smp_mb();
1233 mnt_add_count(mnt, -1);
1234 count = mnt_get_count(mnt);
1235 if (count != 0) {
1236 WARN_ON(count < 0);
1237 rcu_read_unlock();
1238 unlock_mount_hash();
1239 return;
1240 }
1241 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1242 rcu_read_unlock();
1243 unlock_mount_hash();
1244 return;
1245 }
1246 mnt->mnt.mnt_flags |= MNT_DOOMED;
1247 rcu_read_unlock();
1248
1249 list_del(&mnt->mnt_instance);
1250
1251 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1252 struct mount *p, *tmp;
1253 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1254 __put_mountpoint(unhash_mnt(p), &list);
1255 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1256 }
1257 }
1258 unlock_mount_hash();
1259 shrink_dentry_list(&list);
1260
1261 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1262 struct task_struct *task = current;
1263 if (likely(!(task->flags & PF_KTHREAD))) {
1264 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1265 if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
1266 return;
1267 }
1268 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1269 schedule_delayed_work(&delayed_mntput_work, 1);
1270 return;
1271 }
1272 cleanup_mnt(mnt);
1273}
1274
1275void mntput(struct vfsmount *mnt)
1276{
1277 if (mnt) {
1278 struct mount *m = real_mount(mnt);
1279
1280 if (unlikely(m->mnt_expiry_mark))
1281 m->mnt_expiry_mark = 0;
1282 mntput_no_expire(m);
1283 }
1284}
1285EXPORT_SYMBOL(mntput);
1286
1287struct vfsmount *mntget(struct vfsmount *mnt)
1288{
1289 if (mnt)
1290 mnt_add_count(real_mount(mnt), 1);
1291 return mnt;
1292}
1293EXPORT_SYMBOL(mntget);
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306bool path_is_mountpoint(const struct path *path)
1307{
1308 unsigned seq;
1309 bool res;
1310
1311 if (!d_mountpoint(path->dentry))
1312 return false;
1313
1314 rcu_read_lock();
1315 do {
1316 seq = read_seqbegin(&mount_lock);
1317 res = __path_is_mountpoint(path);
1318 } while (read_seqretry(&mount_lock, seq));
1319 rcu_read_unlock();
1320
1321 return res;
1322}
1323EXPORT_SYMBOL(path_is_mountpoint);
1324
1325struct vfsmount *mnt_clone_internal(const struct path *path)
1326{
1327 struct mount *p;
1328 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1329 if (IS_ERR(p))
1330 return ERR_CAST(p);
1331 p->mnt.mnt_flags |= MNT_INTERNAL;
1332 return &p->mnt;
1333}
1334
1335#ifdef CONFIG_PROC_FS
1336static struct mount *mnt_list_next(struct mnt_namespace *ns,
1337 struct list_head *p)
1338{
1339 struct mount *mnt, *ret = NULL;
1340
1341 lock_ns_list(ns);
1342 list_for_each_continue(p, &ns->list) {
1343 mnt = list_entry(p, typeof(*mnt), mnt_list);
1344 if (!mnt_is_cursor(mnt)) {
1345 ret = mnt;
1346 break;
1347 }
1348 }
1349 unlock_ns_list(ns);
1350
1351 return ret;
1352}
1353
1354
1355static void *m_start(struct seq_file *m, loff_t *pos)
1356{
1357 struct proc_mounts *p = m->private;
1358 struct list_head *prev;
1359
1360 down_read(&namespace_sem);
1361 if (!*pos) {
1362 prev = &p->ns->list;
1363 } else {
1364 prev = &p->cursor.mnt_list;
1365
1366
1367 if (list_empty(prev))
1368 return NULL;
1369 }
1370
1371 return mnt_list_next(p->ns, prev);
1372}
1373
1374static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1375{
1376 struct proc_mounts *p = m->private;
1377 struct mount *mnt = v;
1378
1379 ++*pos;
1380 return mnt_list_next(p->ns, &mnt->mnt_list);
1381}
1382
1383static void m_stop(struct seq_file *m, void *v)
1384{
1385 struct proc_mounts *p = m->private;
1386 struct mount *mnt = v;
1387
1388 lock_ns_list(p->ns);
1389 if (mnt)
1390 list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
1391 else
1392 list_del_init(&p->cursor.mnt_list);
1393 unlock_ns_list(p->ns);
1394 up_read(&namespace_sem);
1395}
1396
1397static int m_show(struct seq_file *m, void *v)
1398{
1399 struct proc_mounts *p = m->private;
1400 struct mount *r = v;
1401 return p->show(m, &r->mnt);
1402}
1403
1404const struct seq_operations mounts_op = {
1405 .start = m_start,
1406 .next = m_next,
1407 .stop = m_stop,
1408 .show = m_show,
1409};
1410
1411void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
1412{
1413 down_read(&namespace_sem);
1414 lock_ns_list(ns);
1415 list_del(&cursor->mnt_list);
1416 unlock_ns_list(ns);
1417 up_read(&namespace_sem);
1418}
1419#endif
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429int may_umount_tree(struct vfsmount *m)
1430{
1431 struct mount *mnt = real_mount(m);
1432 int actual_refs = 0;
1433 int minimum_refs = 0;
1434 struct mount *p;
1435 BUG_ON(!m);
1436
1437
1438 lock_mount_hash();
1439 for (p = mnt; p; p = next_mnt(p, mnt)) {
1440 actual_refs += mnt_get_count(p);
1441 minimum_refs += 2;
1442 }
1443 unlock_mount_hash();
1444
1445 if (actual_refs > minimum_refs)
1446 return 0;
1447
1448 return 1;
1449}
1450
1451EXPORT_SYMBOL(may_umount_tree);
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466int may_umount(struct vfsmount *mnt)
1467{
1468 int ret = 1;
1469 down_read(&namespace_sem);
1470 lock_mount_hash();
1471 if (propagate_mount_busy(real_mount(mnt), 2))
1472 ret = 0;
1473 unlock_mount_hash();
1474 up_read(&namespace_sem);
1475 return ret;
1476}
1477
1478EXPORT_SYMBOL(may_umount);
1479
1480static void namespace_unlock(void)
1481{
1482 struct hlist_head head;
1483 struct hlist_node *p;
1484 struct mount *m;
1485 LIST_HEAD(list);
1486
1487 hlist_move_list(&unmounted, &head);
1488 list_splice_init(&ex_mountpoints, &list);
1489
1490 up_write(&namespace_sem);
1491
1492 shrink_dentry_list(&list);
1493
1494 if (likely(hlist_empty(&head)))
1495 return;
1496
1497 synchronize_rcu_expedited();
1498
1499 hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
1500 hlist_del(&m->mnt_umount);
1501 mntput(&m->mnt);
1502 }
1503}
1504
1505static inline void namespace_lock(void)
1506{
1507 down_write(&namespace_sem);
1508}
1509
1510enum umount_tree_flags {
1511 UMOUNT_SYNC = 1,
1512 UMOUNT_PROPAGATE = 2,
1513 UMOUNT_CONNECTED = 4,
1514};
1515
1516static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1517{
1518
1519 if (how & UMOUNT_SYNC)
1520 return true;
1521
1522
1523 if (!mnt_has_parent(mnt))
1524 return true;
1525
1526
1527
1528
1529
1530 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1531 return true;
1532
1533
1534 if (how & UMOUNT_CONNECTED)
1535 return false;
1536
1537
1538 if (IS_MNT_LOCKED(mnt))
1539 return false;
1540
1541
1542 return true;
1543}
1544
1545
1546
1547
1548
1549static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1550{
1551 LIST_HEAD(tmp_list);
1552 struct mount *p;
1553
1554 if (how & UMOUNT_PROPAGATE)
1555 propagate_mount_unlock(mnt);
1556
1557
1558 for (p = mnt; p; p = next_mnt(p, mnt)) {
1559 p->mnt.mnt_flags |= MNT_UMOUNT;
1560 list_move(&p->mnt_list, &tmp_list);
1561 }
1562
1563
1564 list_for_each_entry(p, &tmp_list, mnt_list) {
1565 list_del_init(&p->mnt_child);
1566 }
1567
1568
1569 if (how & UMOUNT_PROPAGATE)
1570 propagate_umount(&tmp_list);
1571
1572 while (!list_empty(&tmp_list)) {
1573 struct mnt_namespace *ns;
1574 bool disconnect;
1575 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1576 list_del_init(&p->mnt_expire);
1577 list_del_init(&p->mnt_list);
1578 ns = p->mnt_ns;
1579 if (ns) {
1580 ns->mounts--;
1581 __touch_mnt_namespace(ns);
1582 }
1583 p->mnt_ns = NULL;
1584 if (how & UMOUNT_SYNC)
1585 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1586
1587 disconnect = disconnect_mount(p, how);
1588 if (mnt_has_parent(p)) {
1589 mnt_add_count(p->mnt_parent, -1);
1590 if (!disconnect) {
1591
1592 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1593 } else {
1594 umount_mnt(p);
1595 }
1596 }
1597 change_mnt_propagation(p, MS_PRIVATE);
1598 if (disconnect)
1599 hlist_add_head(&p->mnt_umount, &unmounted);
1600 }
1601}
1602
1603static void shrink_submounts(struct mount *mnt);
1604
1605static int do_umount_root(struct super_block *sb)
1606{
1607 int ret = 0;
1608
1609 down_write(&sb->s_umount);
1610 if (!sb_rdonly(sb)) {
1611 struct fs_context *fc;
1612
1613 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1614 SB_RDONLY);
1615 if (IS_ERR(fc)) {
1616 ret = PTR_ERR(fc);
1617 } else {
1618 ret = parse_monolithic_mount_data(fc, NULL);
1619 if (!ret)
1620 ret = reconfigure_super(fc);
1621 put_fs_context(fc);
1622 }
1623 }
1624 up_write(&sb->s_umount);
1625 return ret;
1626}
1627
1628static int do_umount(struct mount *mnt, int flags)
1629{
1630 struct super_block *sb = mnt->mnt.mnt_sb;
1631 int retval;
1632
1633 retval = security_sb_umount(&mnt->mnt, flags);
1634 if (retval)
1635 return retval;
1636
1637
1638
1639
1640
1641
1642
1643 if (flags & MNT_EXPIRE) {
1644 if (&mnt->mnt == current->fs->root.mnt ||
1645 flags & (MNT_FORCE | MNT_DETACH))
1646 return -EINVAL;
1647
1648
1649
1650
1651
1652 lock_mount_hash();
1653 if (mnt_get_count(mnt) != 2) {
1654 unlock_mount_hash();
1655 return -EBUSY;
1656 }
1657 unlock_mount_hash();
1658
1659 if (!xchg(&mnt->mnt_expiry_mark, 1))
1660 return -EAGAIN;
1661 }
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1674 sb->s_op->umount_begin(sb);
1675 }
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1687
1688
1689
1690
1691 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1692 return -EPERM;
1693 return do_umount_root(sb);
1694 }
1695
1696 namespace_lock();
1697 lock_mount_hash();
1698
1699
1700 retval = -EINVAL;
1701 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1702 goto out;
1703
1704 event++;
1705 if (flags & MNT_DETACH) {
1706 if (!list_empty(&mnt->mnt_list))
1707 umount_tree(mnt, UMOUNT_PROPAGATE);
1708 retval = 0;
1709 } else {
1710 shrink_submounts(mnt);
1711 retval = -EBUSY;
1712 if (!propagate_mount_busy(mnt, 2)) {
1713 if (!list_empty(&mnt->mnt_list))
1714 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1715 retval = 0;
1716 }
1717 }
1718out:
1719 unlock_mount_hash();
1720 namespace_unlock();
1721 return retval;
1722}
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734void __detach_mounts(struct dentry *dentry)
1735{
1736 struct mountpoint *mp;
1737 struct mount *mnt;
1738
1739 namespace_lock();
1740 lock_mount_hash();
1741 mp = lookup_mountpoint(dentry);
1742 if (!mp)
1743 goto out_unlock;
1744
1745 event++;
1746 while (!hlist_empty(&mp->m_list)) {
1747 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1748 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1749 umount_mnt(mnt);
1750 hlist_add_head(&mnt->mnt_umount, &unmounted);
1751 }
1752 else umount_tree(mnt, UMOUNT_CONNECTED);
1753 }
1754 put_mountpoint(mp);
1755out_unlock:
1756 unlock_mount_hash();
1757 namespace_unlock();
1758}
1759
1760
1761
1762
1763bool may_mount(void)
1764{
1765 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1766}
1767
1768static void warn_mandlock(void)
1769{
1770 pr_warn_once("=======================================================\n"
1771 "WARNING: The mand mount option has been deprecated and\n"
1772 " and is ignored by this kernel. Remove the mand\n"
1773 " option from the mount to silence this warning.\n"
1774 "=======================================================\n");
1775}
1776
1777static int can_umount(const struct path *path, int flags)
1778{
1779 struct mount *mnt = real_mount(path->mnt);
1780
1781 if (!may_mount())
1782 return -EPERM;
1783 if (path->dentry != path->mnt->mnt_root)
1784 return -EINVAL;
1785 if (!check_mnt(mnt))
1786 return -EINVAL;
1787 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1788 return -EINVAL;
1789 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1790 return -EPERM;
1791 return 0;
1792}
1793
1794
1795int path_umount(struct path *path, int flags)
1796{
1797 struct mount *mnt = real_mount(path->mnt);
1798 int ret;
1799
1800 ret = can_umount(path, flags);
1801 if (!ret)
1802 ret = do_umount(mnt, flags);
1803
1804
1805 dput(path->dentry);
1806 mntput_no_expire(mnt);
1807 return ret;
1808}
1809
1810static int ksys_umount(char __user *name, int flags)
1811{
1812 int lookup_flags = LOOKUP_MOUNTPOINT;
1813 struct path path;
1814 int ret;
1815
1816
1817 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1818 return -EINVAL;
1819
1820 if (!(flags & UMOUNT_NOFOLLOW))
1821 lookup_flags |= LOOKUP_FOLLOW;
1822 ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1823 if (ret)
1824 return ret;
1825 return path_umount(&path, flags);
1826}
1827
1828SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1829{
1830 return ksys_umount(name, flags);
1831}
1832
1833#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1834
1835
1836
1837
1838SYSCALL_DEFINE1(oldumount, char __user *, name)
1839{
1840 return ksys_umount(name, 0);
1841}
1842
1843#endif
1844
1845static bool is_mnt_ns_file(struct dentry *dentry)
1846{
1847
1848 return dentry->d_op == &ns_dentry_operations &&
1849 dentry->d_fsdata == &mntns_operations;
1850}
1851
1852static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1853{
1854 return container_of(ns, struct mnt_namespace, ns);
1855}
1856
1857struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1858{
1859 return &mnt->ns;
1860}
1861
1862static bool mnt_ns_loop(struct dentry *dentry)
1863{
1864
1865
1866
1867 struct mnt_namespace *mnt_ns;
1868 if (!is_mnt_ns_file(dentry))
1869 return false;
1870
1871 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1872 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1873}
1874
1875struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1876 int flag)
1877{
1878 struct mount *res, *p, *q, *r, *parent;
1879
1880 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1881 return ERR_PTR(-EINVAL);
1882
1883 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1884 return ERR_PTR(-EINVAL);
1885
1886 res = q = clone_mnt(mnt, dentry, flag);
1887 if (IS_ERR(q))
1888 return q;
1889
1890 q->mnt_mountpoint = mnt->mnt_mountpoint;
1891
1892 p = mnt;
1893 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1894 struct mount *s;
1895 if (!is_subdir(r->mnt_mountpoint, dentry))
1896 continue;
1897
1898 for (s = r; s; s = next_mnt(s, r)) {
1899 if (!(flag & CL_COPY_UNBINDABLE) &&
1900 IS_MNT_UNBINDABLE(s)) {
1901 if (s->mnt.mnt_flags & MNT_LOCKED) {
1902
1903 q = ERR_PTR(-EPERM);
1904 goto out;
1905 } else {
1906 s = skip_mnt_tree(s);
1907 continue;
1908 }
1909 }
1910 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1911 is_mnt_ns_file(s->mnt.mnt_root)) {
1912 s = skip_mnt_tree(s);
1913 continue;
1914 }
1915 while (p != s->mnt_parent) {
1916 p = p->mnt_parent;
1917 q = q->mnt_parent;
1918 }
1919 p = s;
1920 parent = q;
1921 q = clone_mnt(p, p->mnt.mnt_root, flag);
1922 if (IS_ERR(q))
1923 goto out;
1924 lock_mount_hash();
1925 list_add_tail(&q->mnt_list, &res->mnt_list);
1926 attach_mnt(q, parent, p->mnt_mp);
1927 unlock_mount_hash();
1928 }
1929 }
1930 return res;
1931out:
1932 if (res) {
1933 lock_mount_hash();
1934 umount_tree(res, UMOUNT_SYNC);
1935 unlock_mount_hash();
1936 }
1937 return q;
1938}
1939
1940
1941
1942struct vfsmount *collect_mounts(const struct path *path)
1943{
1944 struct mount *tree;
1945 namespace_lock();
1946 if (!check_mnt(real_mount(path->mnt)))
1947 tree = ERR_PTR(-EINVAL);
1948 else
1949 tree = copy_tree(real_mount(path->mnt), path->dentry,
1950 CL_COPY_ALL | CL_PRIVATE);
1951 namespace_unlock();
1952 if (IS_ERR(tree))
1953 return ERR_CAST(tree);
1954 return &tree->mnt;
1955}
1956
1957static void free_mnt_ns(struct mnt_namespace *);
1958static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1959
1960void dissolve_on_fput(struct vfsmount *mnt)
1961{
1962 struct mnt_namespace *ns;
1963 namespace_lock();
1964 lock_mount_hash();
1965 ns = real_mount(mnt)->mnt_ns;
1966 if (ns) {
1967 if (is_anon_ns(ns))
1968 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1969 else
1970 ns = NULL;
1971 }
1972 unlock_mount_hash();
1973 namespace_unlock();
1974 if (ns)
1975 free_mnt_ns(ns);
1976}
1977
1978void drop_collected_mounts(struct vfsmount *mnt)
1979{
1980 namespace_lock();
1981 lock_mount_hash();
1982 umount_tree(real_mount(mnt), 0);
1983 unlock_mount_hash();
1984 namespace_unlock();
1985}
1986
1987static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1988{
1989 struct mount *child;
1990
1991 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1992 if (!is_subdir(child->mnt_mountpoint, dentry))
1993 continue;
1994
1995 if (child->mnt.mnt_flags & MNT_LOCKED)
1996 return true;
1997 }
1998 return false;
1999}
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011struct vfsmount *clone_private_mount(const struct path *path)
2012{
2013 struct mount *old_mnt = real_mount(path->mnt);
2014 struct mount *new_mnt;
2015
2016 down_read(&namespace_sem);
2017 if (IS_MNT_UNBINDABLE(old_mnt))
2018 goto invalid;
2019
2020 if (!check_mnt(old_mnt))
2021 goto invalid;
2022
2023 if (has_locked_children(old_mnt, path->dentry))
2024 goto invalid;
2025
2026 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
2027 up_read(&namespace_sem);
2028
2029 if (IS_ERR(new_mnt))
2030 return ERR_CAST(new_mnt);
2031
2032
2033 new_mnt->mnt_ns = MNT_NS_INTERNAL;
2034
2035 return &new_mnt->mnt;
2036
2037invalid:
2038 up_read(&namespace_sem);
2039 return ERR_PTR(-EINVAL);
2040}
2041EXPORT_SYMBOL_GPL(clone_private_mount);
2042
2043int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
2044 struct vfsmount *root)
2045{
2046 struct mount *mnt;
2047 int res = f(root, arg);
2048 if (res)
2049 return res;
2050 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
2051 res = f(&mnt->mnt, arg);
2052 if (res)
2053 return res;
2054 }
2055 return 0;
2056}
2057
2058static void lock_mnt_tree(struct mount *mnt)
2059{
2060 struct mount *p;
2061
2062 for (p = mnt; p; p = next_mnt(p, mnt)) {
2063 int flags = p->mnt.mnt_flags;
2064
2065 flags |= MNT_LOCK_ATIME;
2066
2067 if (flags & MNT_READONLY)
2068 flags |= MNT_LOCK_READONLY;
2069
2070 if (flags & MNT_NODEV)
2071 flags |= MNT_LOCK_NODEV;
2072
2073 if (flags & MNT_NOSUID)
2074 flags |= MNT_LOCK_NOSUID;
2075
2076 if (flags & MNT_NOEXEC)
2077 flags |= MNT_LOCK_NOEXEC;
2078
2079 if (list_empty(&p->mnt_expire))
2080 flags |= MNT_LOCKED;
2081 p->mnt.mnt_flags = flags;
2082 }
2083}
2084
2085static void cleanup_group_ids(struct mount *mnt, struct mount *end)
2086{
2087 struct mount *p;
2088
2089 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
2090 if (p->mnt_group_id && !IS_MNT_SHARED(p))
2091 mnt_release_group_id(p);
2092 }
2093}
2094
2095static int invent_group_ids(struct mount *mnt, bool recurse)
2096{
2097 struct mount *p;
2098
2099 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
2100 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
2101 int err = mnt_alloc_group_id(p);
2102 if (err) {
2103 cleanup_group_ids(mnt, p);
2104 return err;
2105 }
2106 }
2107 }
2108
2109 return 0;
2110}
2111
2112int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2113{
2114 unsigned int max = READ_ONCE(sysctl_mount_max);
2115 unsigned int mounts = 0;
2116 struct mount *p;
2117
2118 if (ns->mounts >= max)
2119 return -ENOSPC;
2120 max -= ns->mounts;
2121 if (ns->pending_mounts >= max)
2122 return -ENOSPC;
2123 max -= ns->pending_mounts;
2124
2125 for (p = mnt; p; p = next_mnt(p, mnt))
2126 mounts++;
2127
2128 if (mounts > max)
2129 return -ENOSPC;
2130
2131 ns->pending_mounts += mounts;
2132 return 0;
2133}
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198static int attach_recursive_mnt(struct mount *source_mnt,
2199 struct mount *dest_mnt,
2200 struct mountpoint *dest_mp,
2201 bool moving)
2202{
2203 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2204 HLIST_HEAD(tree_list);
2205 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2206 struct mountpoint *smp;
2207 struct mount *child, *p;
2208 struct hlist_node *n;
2209 int err;
2210
2211
2212
2213
2214 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2215 if (IS_ERR(smp))
2216 return PTR_ERR(smp);
2217
2218
2219 if (!moving) {
2220 err = count_mounts(ns, source_mnt);
2221 if (err)
2222 goto out;
2223 }
2224
2225 if (IS_MNT_SHARED(dest_mnt)) {
2226 err = invent_group_ids(source_mnt, true);
2227 if (err)
2228 goto out;
2229 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2230 lock_mount_hash();
2231 if (err)
2232 goto out_cleanup_ids;
2233 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2234 set_mnt_shared(p);
2235 } else {
2236 lock_mount_hash();
2237 }
2238 if (moving) {
2239 unhash_mnt(source_mnt);
2240 attach_mnt(source_mnt, dest_mnt, dest_mp);
2241 touch_mnt_namespace(source_mnt->mnt_ns);
2242 } else {
2243 if (source_mnt->mnt_ns) {
2244
2245 list_del_init(&source_mnt->mnt_ns->list);
2246 }
2247 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2248 commit_tree(source_mnt);
2249 }
2250
2251 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2252 struct mount *q;
2253 hlist_del_init(&child->mnt_hash);
2254 q = __lookup_mnt(&child->mnt_parent->mnt,
2255 child->mnt_mountpoint);
2256 if (q)
2257 mnt_change_mountpoint(child, smp, q);
2258
2259 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2260 lock_mnt_tree(child);
2261 child->mnt.mnt_flags &= ~MNT_LOCKED;
2262 commit_tree(child);
2263 }
2264 put_mountpoint(smp);
2265 unlock_mount_hash();
2266
2267 return 0;
2268
2269 out_cleanup_ids:
2270 while (!hlist_empty(&tree_list)) {
2271 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2272 child->mnt_parent->mnt_ns->pending_mounts = 0;
2273 umount_tree(child, UMOUNT_SYNC);
2274 }
2275 unlock_mount_hash();
2276 cleanup_group_ids(source_mnt, NULL);
2277 out:
2278 ns->pending_mounts = 0;
2279
2280 read_seqlock_excl(&mount_lock);
2281 put_mountpoint(smp);
2282 read_sequnlock_excl(&mount_lock);
2283
2284 return err;
2285}
2286
2287static struct mountpoint *lock_mount(struct path *path)
2288{
2289 struct vfsmount *mnt;
2290 struct dentry *dentry = path->dentry;
2291retry:
2292 inode_lock(dentry->d_inode);
2293 if (unlikely(cant_mount(dentry))) {
2294 inode_unlock(dentry->d_inode);
2295 return ERR_PTR(-ENOENT);
2296 }
2297 namespace_lock();
2298 mnt = lookup_mnt(path);
2299 if (likely(!mnt)) {
2300 struct mountpoint *mp = get_mountpoint(dentry);
2301 if (IS_ERR(mp)) {
2302 namespace_unlock();
2303 inode_unlock(dentry->d_inode);
2304 return mp;
2305 }
2306 return mp;
2307 }
2308 namespace_unlock();
2309 inode_unlock(path->dentry->d_inode);
2310 path_put(path);
2311 path->mnt = mnt;
2312 dentry = path->dentry = dget(mnt->mnt_root);
2313 goto retry;
2314}
2315
2316static void unlock_mount(struct mountpoint *where)
2317{
2318 struct dentry *dentry = where->m_dentry;
2319
2320 read_seqlock_excl(&mount_lock);
2321 put_mountpoint(where);
2322 read_sequnlock_excl(&mount_lock);
2323
2324 namespace_unlock();
2325 inode_unlock(dentry->d_inode);
2326}
2327
2328static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2329{
2330 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2331 return -EINVAL;
2332
2333 if (d_is_dir(mp->m_dentry) !=
2334 d_is_dir(mnt->mnt.mnt_root))
2335 return -ENOTDIR;
2336
2337 return attach_recursive_mnt(mnt, p, mp, false);
2338}
2339
2340
2341
2342
2343
2344static int flags_to_propagation_type(int ms_flags)
2345{
2346 int type = ms_flags & ~(MS_REC | MS_SILENT);
2347
2348
2349 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2350 return 0;
2351
2352 if (!is_power_of_2(type))
2353 return 0;
2354 return type;
2355}
2356
2357
2358
2359
2360static int do_change_type(struct path *path, int ms_flags)
2361{
2362 struct mount *m;
2363 struct mount *mnt = real_mount(path->mnt);
2364 int recurse = ms_flags & MS_REC;
2365 int type;
2366 int err = 0;
2367
2368 if (path->dentry != path->mnt->mnt_root)
2369 return -EINVAL;
2370
2371 type = flags_to_propagation_type(ms_flags);
2372 if (!type)
2373 return -EINVAL;
2374
2375 namespace_lock();
2376 if (type == MS_SHARED) {
2377 err = invent_group_ids(mnt, recurse);
2378 if (err)
2379 goto out_unlock;
2380 }
2381
2382 lock_mount_hash();
2383 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2384 change_mnt_propagation(m, type);
2385 unlock_mount_hash();
2386
2387 out_unlock:
2388 namespace_unlock();
2389 return err;
2390}
2391
2392static struct mount *__do_loopback(struct path *old_path, int recurse)
2393{
2394 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2395
2396 if (IS_MNT_UNBINDABLE(old))
2397 return mnt;
2398
2399 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2400 return mnt;
2401
2402 if (!recurse && has_locked_children(old, old_path->dentry))
2403 return mnt;
2404
2405 if (recurse)
2406 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2407 else
2408 mnt = clone_mnt(old, old_path->dentry, 0);
2409
2410 if (!IS_ERR(mnt))
2411 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2412
2413 return mnt;
2414}
2415
2416
2417
2418
2419static int do_loopback(struct path *path, const char *old_name,
2420 int recurse)
2421{
2422 struct path old_path;
2423 struct mount *mnt = NULL, *parent;
2424 struct mountpoint *mp;
2425 int err;
2426 if (!old_name || !*old_name)
2427 return -EINVAL;
2428 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2429 if (err)
2430 return err;
2431
2432 err = -EINVAL;
2433 if (mnt_ns_loop(old_path.dentry))
2434 goto out;
2435
2436 mp = lock_mount(path);
2437 if (IS_ERR(mp)) {
2438 err = PTR_ERR(mp);
2439 goto out;
2440 }
2441
2442 parent = real_mount(path->mnt);
2443 if (!check_mnt(parent))
2444 goto out2;
2445
2446 mnt = __do_loopback(&old_path, recurse);
2447 if (IS_ERR(mnt)) {
2448 err = PTR_ERR(mnt);
2449 goto out2;
2450 }
2451
2452 err = graft_tree(mnt, parent, mp);
2453 if (err) {
2454 lock_mount_hash();
2455 umount_tree(mnt, UMOUNT_SYNC);
2456 unlock_mount_hash();
2457 }
2458out2:
2459 unlock_mount(mp);
2460out:
2461 path_put(&old_path);
2462 return err;
2463}
2464
2465static struct file *open_detached_copy(struct path *path, bool recursive)
2466{
2467 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2468 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2469 struct mount *mnt, *p;
2470 struct file *file;
2471
2472 if (IS_ERR(ns))
2473 return ERR_CAST(ns);
2474
2475 namespace_lock();
2476 mnt = __do_loopback(path, recursive);
2477 if (IS_ERR(mnt)) {
2478 namespace_unlock();
2479 free_mnt_ns(ns);
2480 return ERR_CAST(mnt);
2481 }
2482
2483 lock_mount_hash();
2484 for (p = mnt; p; p = next_mnt(p, mnt)) {
2485 p->mnt_ns = ns;
2486 ns->mounts++;
2487 }
2488 ns->root = mnt;
2489 list_add_tail(&ns->list, &mnt->mnt_list);
2490 mntget(&mnt->mnt);
2491 unlock_mount_hash();
2492 namespace_unlock();
2493
2494 mntput(path->mnt);
2495 path->mnt = &mnt->mnt;
2496 file = dentry_open(path, O_PATH, current_cred());
2497 if (IS_ERR(file))
2498 dissolve_on_fput(path->mnt);
2499 else
2500 file->f_mode |= FMODE_NEED_UNMOUNT;
2501 return file;
2502}
2503
2504SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
2505{
2506 struct file *file;
2507 struct path path;
2508 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2509 bool detached = flags & OPEN_TREE_CLONE;
2510 int error;
2511 int fd;
2512
2513 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2514
2515 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2516 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2517 OPEN_TREE_CLOEXEC))
2518 return -EINVAL;
2519
2520 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2521 return -EINVAL;
2522
2523 if (flags & AT_NO_AUTOMOUNT)
2524 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2525 if (flags & AT_SYMLINK_NOFOLLOW)
2526 lookup_flags &= ~LOOKUP_FOLLOW;
2527 if (flags & AT_EMPTY_PATH)
2528 lookup_flags |= LOOKUP_EMPTY;
2529
2530 if (detached && !may_mount())
2531 return -EPERM;
2532
2533 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2534 if (fd < 0)
2535 return fd;
2536
2537 error = user_path_at(dfd, filename, lookup_flags, &path);
2538 if (unlikely(error)) {
2539 file = ERR_PTR(error);
2540 } else {
2541 if (detached)
2542 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2543 else
2544 file = dentry_open(&path, O_PATH, current_cred());
2545 path_put(&path);
2546 }
2547 if (IS_ERR(file)) {
2548 put_unused_fd(fd);
2549 return PTR_ERR(file);
2550 }
2551 fd_install(fd, file);
2552 return fd;
2553}
2554
2555
2556
2557
2558
2559
2560
2561static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
2562{
2563 unsigned int fl = mnt->mnt.mnt_flags;
2564
2565 if ((fl & MNT_LOCK_READONLY) &&
2566 !(mnt_flags & MNT_READONLY))
2567 return false;
2568
2569 if ((fl & MNT_LOCK_NODEV) &&
2570 !(mnt_flags & MNT_NODEV))
2571 return false;
2572
2573 if ((fl & MNT_LOCK_NOSUID) &&
2574 !(mnt_flags & MNT_NOSUID))
2575 return false;
2576
2577 if ((fl & MNT_LOCK_NOEXEC) &&
2578 !(mnt_flags & MNT_NOEXEC))
2579 return false;
2580
2581 if ((fl & MNT_LOCK_ATIME) &&
2582 ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
2583 return false;
2584
2585 return true;
2586}
2587
2588static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
2589{
2590 bool readonly_request = (mnt_flags & MNT_READONLY);
2591
2592 if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2593 return 0;
2594
2595 if (readonly_request)
2596 return mnt_make_readonly(mnt);
2597
2598 mnt->mnt.mnt_flags &= ~MNT_READONLY;
2599 return 0;
2600}
2601
2602static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
2603{
2604 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2605 mnt->mnt.mnt_flags = mnt_flags;
2606 touch_mnt_namespace(mnt->mnt_ns);
2607}
2608
2609static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
2610{
2611 struct super_block *sb = mnt->mnt_sb;
2612
2613 if (!__mnt_is_readonly(mnt) &&
2614 (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
2615 (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
2616 char *buf = (char *)__get_free_page(GFP_KERNEL);
2617 char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
2618 struct tm tm;
2619
2620 time64_to_tm(sb->s_time_max, 0, &tm);
2621
2622 pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
2623 sb->s_type->name,
2624 is_mounted(mnt) ? "remounted" : "mounted",
2625 mntpath,
2626 tm.tm_year+1900, (unsigned long long)sb->s_time_max);
2627
2628 free_page((unsigned long)buf);
2629 sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
2630 }
2631}
2632
2633
2634
2635
2636
2637
2638static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
2639{
2640 struct super_block *sb = path->mnt->mnt_sb;
2641 struct mount *mnt = real_mount(path->mnt);
2642 int ret;
2643
2644 if (!check_mnt(mnt))
2645 return -EINVAL;
2646
2647 if (path->dentry != mnt->mnt.mnt_root)
2648 return -EINVAL;
2649
2650 if (!can_change_locked_flags(mnt, mnt_flags))
2651 return -EPERM;
2652
2653
2654
2655
2656
2657 down_read(&sb->s_umount);
2658 lock_mount_hash();
2659 ret = change_mount_ro_state(mnt, mnt_flags);
2660 if (ret == 0)
2661 set_mount_attributes(mnt, mnt_flags);
2662 unlock_mount_hash();
2663 up_read(&sb->s_umount);
2664
2665 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2666
2667 return ret;
2668}
2669
2670
2671
2672
2673
2674
2675static int do_remount(struct path *path, int ms_flags, int sb_flags,
2676 int mnt_flags, void *data)
2677{
2678 int err;
2679 struct super_block *sb = path->mnt->mnt_sb;
2680 struct mount *mnt = real_mount(path->mnt);
2681 struct fs_context *fc;
2682
2683 if (!check_mnt(mnt))
2684 return -EINVAL;
2685
2686 if (path->dentry != path->mnt->mnt_root)
2687 return -EINVAL;
2688
2689 if (!can_change_locked_flags(mnt, mnt_flags))
2690 return -EPERM;
2691
2692 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2693 if (IS_ERR(fc))
2694 return PTR_ERR(fc);
2695
2696 fc->oldapi = true;
2697 err = parse_monolithic_mount_data(fc, data);
2698 if (!err) {
2699 down_write(&sb->s_umount);
2700 err = -EPERM;
2701 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2702 err = reconfigure_super(fc);
2703 if (!err) {
2704 lock_mount_hash();
2705 set_mount_attributes(mnt, mnt_flags);
2706 unlock_mount_hash();
2707 }
2708 }
2709 up_write(&sb->s_umount);
2710 }
2711
2712 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2713
2714 put_fs_context(fc);
2715 return err;
2716}
2717
2718static inline int tree_contains_unbindable(struct mount *mnt)
2719{
2720 struct mount *p;
2721 for (p = mnt; p; p = next_mnt(p, mnt)) {
2722 if (IS_MNT_UNBINDABLE(p))
2723 return 1;
2724 }
2725 return 0;
2726}
2727
2728
2729
2730
2731
2732
2733
2734static bool check_for_nsfs_mounts(struct mount *subtree)
2735{
2736 struct mount *p;
2737 bool ret = false;
2738
2739 lock_mount_hash();
2740 for (p = subtree; p; p = next_mnt(p, subtree))
2741 if (mnt_ns_loop(p->mnt.mnt_root))
2742 goto out;
2743
2744 ret = true;
2745out:
2746 unlock_mount_hash();
2747 return ret;
2748}
2749
2750static int do_set_group(struct path *from_path, struct path *to_path)
2751{
2752 struct mount *from, *to;
2753 int err;
2754
2755 from = real_mount(from_path->mnt);
2756 to = real_mount(to_path->mnt);
2757
2758 namespace_lock();
2759
2760 err = -EINVAL;
2761
2762 if (!is_mounted(&from->mnt))
2763 goto out;
2764 if (!is_mounted(&to->mnt))
2765 goto out;
2766
2767 err = -EPERM;
2768
2769 if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
2770 goto out;
2771 if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
2772 goto out;
2773
2774 err = -EINVAL;
2775
2776 if (from_path->dentry != from_path->mnt->mnt_root)
2777 goto out;
2778 if (to_path->dentry != to_path->mnt->mnt_root)
2779 goto out;
2780
2781
2782 if (from->mnt.mnt_sb != to->mnt.mnt_sb)
2783 goto out;
2784
2785
2786 if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
2787 goto out;
2788
2789
2790 if (has_locked_children(from, to->mnt.mnt_root))
2791 goto out;
2792
2793
2794 if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
2795 goto out;
2796
2797
2798 if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
2799 goto out;
2800
2801 if (IS_MNT_SLAVE(from)) {
2802 struct mount *m = from->mnt_master;
2803
2804 list_add(&to->mnt_slave, &m->mnt_slave_list);
2805 to->mnt_master = m;
2806 }
2807
2808 if (IS_MNT_SHARED(from)) {
2809 to->mnt_group_id = from->mnt_group_id;
2810 list_add(&to->mnt_share, &from->mnt_share);
2811 lock_mount_hash();
2812 set_mnt_shared(to);
2813 unlock_mount_hash();
2814 }
2815
2816 err = 0;
2817out:
2818 namespace_unlock();
2819 return err;
2820}
2821
2822static int do_move_mount(struct path *old_path, struct path *new_path)
2823{
2824 struct mnt_namespace *ns;
2825 struct mount *p;
2826 struct mount *old;
2827 struct mount *parent;
2828 struct mountpoint *mp, *old_mp;
2829 int err;
2830 bool attached;
2831
2832 mp = lock_mount(new_path);
2833 if (IS_ERR(mp))
2834 return PTR_ERR(mp);
2835
2836 old = real_mount(old_path->mnt);
2837 p = real_mount(new_path->mnt);
2838 parent = old->mnt_parent;
2839 attached = mnt_has_parent(old);
2840 old_mp = old->mnt_mp;
2841 ns = old->mnt_ns;
2842
2843 err = -EINVAL;
2844
2845 if (!check_mnt(p))
2846 goto out;
2847
2848
2849 if (!is_mounted(&old->mnt))
2850 goto out;
2851
2852
2853 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2854 goto out;
2855
2856 if (old->mnt.mnt_flags & MNT_LOCKED)
2857 goto out;
2858
2859 if (old_path->dentry != old_path->mnt->mnt_root)
2860 goto out;
2861
2862 if (d_is_dir(new_path->dentry) !=
2863 d_is_dir(old_path->dentry))
2864 goto out;
2865
2866
2867
2868 if (attached && IS_MNT_SHARED(parent))
2869 goto out;
2870
2871
2872
2873
2874 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2875 goto out;
2876 err = -ELOOP;
2877 if (!check_for_nsfs_mounts(old))
2878 goto out;
2879 for (; mnt_has_parent(p); p = p->mnt_parent)
2880 if (p == old)
2881 goto out;
2882
2883 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2884 attached);
2885 if (err)
2886 goto out;
2887
2888
2889
2890 list_del_init(&old->mnt_expire);
2891 if (attached)
2892 put_mountpoint(old_mp);
2893out:
2894 unlock_mount(mp);
2895 if (!err) {
2896 if (attached)
2897 mntput_no_expire(parent);
2898 else
2899 free_mnt_ns(ns);
2900 }
2901 return err;
2902}
2903
2904static int do_move_mount_old(struct path *path, const char *old_name)
2905{
2906 struct path old_path;
2907 int err;
2908
2909 if (!old_name || !*old_name)
2910 return -EINVAL;
2911
2912 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2913 if (err)
2914 return err;
2915
2916 err = do_move_mount(&old_path, path);
2917 path_put(&old_path);
2918 return err;
2919}
2920
2921
2922
2923
2924static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
2925 const struct path *path, int mnt_flags)
2926{
2927 struct mount *parent = real_mount(path->mnt);
2928
2929 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2930
2931 if (unlikely(!check_mnt(parent))) {
2932
2933 if (!(mnt_flags & MNT_SHRINKABLE))
2934 return -EINVAL;
2935
2936 if (!parent->mnt_ns)
2937 return -EINVAL;
2938 }
2939
2940
2941 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2942 path->mnt->mnt_root == path->dentry)
2943 return -EBUSY;
2944
2945 if (d_is_symlink(newmnt->mnt.mnt_root))
2946 return -EINVAL;
2947
2948 newmnt->mnt.mnt_flags = mnt_flags;
2949 return graft_tree(newmnt, parent, mp);
2950}
2951
2952static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2953
2954
2955
2956
2957
2958static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2959 unsigned int mnt_flags)
2960{
2961 struct vfsmount *mnt;
2962 struct mountpoint *mp;
2963 struct super_block *sb = fc->root->d_sb;
2964 int error;
2965
2966 error = security_sb_kern_mount(sb);
2967 if (!error && mount_too_revealing(sb, &mnt_flags))
2968 error = -EPERM;
2969
2970 if (unlikely(error)) {
2971 fc_drop_locked(fc);
2972 return error;
2973 }
2974
2975 up_write(&sb->s_umount);
2976
2977 mnt = vfs_create_mount(fc);
2978 if (IS_ERR(mnt))
2979 return PTR_ERR(mnt);
2980
2981 mnt_warn_timestamp_expiry(mountpoint, mnt);
2982
2983 mp = lock_mount(mountpoint);
2984 if (IS_ERR(mp)) {
2985 mntput(mnt);
2986 return PTR_ERR(mp);
2987 }
2988 error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
2989 unlock_mount(mp);
2990 if (error < 0)
2991 mntput(mnt);
2992 return error;
2993}
2994
2995
2996
2997
2998
2999static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
3000 int mnt_flags, const char *name, void *data)
3001{
3002 struct file_system_type *type;
3003 struct fs_context *fc;
3004 const char *subtype = NULL;
3005 int err = 0;
3006
3007 if (!fstype)
3008 return -EINVAL;
3009
3010 type = get_fs_type(fstype);
3011 if (!type)
3012 return -ENODEV;
3013
3014 if (type->fs_flags & FS_HAS_SUBTYPE) {
3015 subtype = strchr(fstype, '.');
3016 if (subtype) {
3017 subtype++;
3018 if (!*subtype) {
3019 put_filesystem(type);
3020 return -EINVAL;
3021 }
3022 }
3023 }
3024
3025 fc = fs_context_for_mount(type, sb_flags);
3026 put_filesystem(type);
3027 if (IS_ERR(fc))
3028 return PTR_ERR(fc);
3029
3030 if (subtype)
3031 err = vfs_parse_fs_string(fc, "subtype",
3032 subtype, strlen(subtype));
3033 if (!err && name)
3034 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
3035 if (!err)
3036 err = parse_monolithic_mount_data(fc, data);
3037 if (!err && !mount_capable(fc))
3038 err = -EPERM;
3039 if (!err)
3040 err = vfs_get_tree(fc);
3041 if (!err)
3042 err = do_new_mount_fc(fc, path, mnt_flags);
3043
3044 put_fs_context(fc);
3045 return err;
3046}
3047
3048int finish_automount(struct vfsmount *m, const struct path *path)
3049{
3050 struct dentry *dentry = path->dentry;
3051 struct mountpoint *mp;
3052 struct mount *mnt;
3053 int err;
3054
3055 if (!m)
3056 return 0;
3057 if (IS_ERR(m))
3058 return PTR_ERR(m);
3059
3060 mnt = real_mount(m);
3061
3062
3063
3064 BUG_ON(mnt_get_count(mnt) < 2);
3065
3066 if (m->mnt_sb == path->mnt->mnt_sb &&
3067 m->mnt_root == dentry) {
3068 err = -ELOOP;
3069 goto discard;
3070 }
3071
3072
3073
3074
3075
3076
3077 inode_lock(dentry->d_inode);
3078 namespace_lock();
3079 if (unlikely(cant_mount(dentry))) {
3080 err = -ENOENT;
3081 goto discard_locked;
3082 }
3083 rcu_read_lock();
3084 if (unlikely(__lookup_mnt(path->mnt, dentry))) {
3085 rcu_read_unlock();
3086 err = 0;
3087 goto discard_locked;
3088 }
3089 rcu_read_unlock();
3090 mp = get_mountpoint(dentry);
3091 if (IS_ERR(mp)) {
3092 err = PTR_ERR(mp);
3093 goto discard_locked;
3094 }
3095
3096 err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
3097 unlock_mount(mp);
3098 if (unlikely(err))
3099 goto discard;
3100 mntput(m);
3101 return 0;
3102
3103discard_locked:
3104 namespace_unlock();
3105 inode_unlock(dentry->d_inode);
3106discard:
3107
3108 if (!list_empty(&mnt->mnt_expire)) {
3109 namespace_lock();
3110 list_del_init(&mnt->mnt_expire);
3111 namespace_unlock();
3112 }
3113 mntput(m);
3114 mntput(m);
3115 return err;
3116}
3117
3118
3119
3120
3121
3122
3123void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
3124{
3125 namespace_lock();
3126
3127 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
3128
3129 namespace_unlock();
3130}
3131EXPORT_SYMBOL(mnt_set_expiry);
3132
3133
3134
3135
3136
3137
3138void mark_mounts_for_expiry(struct list_head *mounts)
3139{
3140 struct mount *mnt, *next;
3141 LIST_HEAD(graveyard);
3142
3143 if (list_empty(mounts))
3144 return;
3145
3146 namespace_lock();
3147 lock_mount_hash();
3148
3149
3150
3151
3152
3153
3154
3155 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
3156 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
3157 propagate_mount_busy(mnt, 1))
3158 continue;
3159 list_move(&mnt->mnt_expire, &graveyard);
3160 }
3161 while (!list_empty(&graveyard)) {
3162 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
3163 touch_mnt_namespace(mnt->mnt_ns);
3164 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3165 }
3166 unlock_mount_hash();
3167 namespace_unlock();
3168}
3169
3170EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
3171
3172
3173
3174
3175
3176
3177
3178static int select_submounts(struct mount *parent, struct list_head *graveyard)
3179{
3180 struct mount *this_parent = parent;
3181 struct list_head *next;
3182 int found = 0;
3183
3184repeat:
3185 next = this_parent->mnt_mounts.next;
3186resume:
3187 while (next != &this_parent->mnt_mounts) {
3188 struct list_head *tmp = next;
3189 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
3190
3191 next = tmp->next;
3192 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
3193 continue;
3194
3195
3196
3197 if (!list_empty(&mnt->mnt_mounts)) {
3198 this_parent = mnt;
3199 goto repeat;
3200 }
3201
3202 if (!propagate_mount_busy(mnt, 1)) {
3203 list_move_tail(&mnt->mnt_expire, graveyard);
3204 found++;
3205 }
3206 }
3207
3208
3209
3210 if (this_parent != parent) {
3211 next = this_parent->mnt_child.next;
3212 this_parent = this_parent->mnt_parent;
3213 goto resume;
3214 }
3215 return found;
3216}
3217
3218
3219
3220
3221
3222
3223
3224static void shrink_submounts(struct mount *mnt)
3225{
3226 LIST_HEAD(graveyard);
3227 struct mount *m;
3228
3229
3230 while (select_submounts(mnt, &graveyard)) {
3231 while (!list_empty(&graveyard)) {
3232 m = list_first_entry(&graveyard, struct mount,
3233 mnt_expire);
3234 touch_mnt_namespace(m->mnt_ns);
3235 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3236 }
3237 }
3238}
3239
3240static void *copy_mount_options(const void __user * data)
3241{
3242 char *copy;
3243 unsigned left, offset;
3244
3245 if (!data)
3246 return NULL;
3247
3248 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
3249 if (!copy)
3250 return ERR_PTR(-ENOMEM);
3251
3252 left = copy_from_user(copy, data, PAGE_SIZE);
3253
3254
3255
3256
3257
3258 offset = PAGE_SIZE - left;
3259 while (left) {
3260 char c;
3261 if (get_user(c, (const char __user *)data + offset))
3262 break;
3263 copy[offset] = c;
3264 left--;
3265 offset++;
3266 }
3267
3268 if (left == PAGE_SIZE) {
3269 kfree(copy);
3270 return ERR_PTR(-EFAULT);
3271 }
3272
3273 return copy;
3274}
3275
3276static char *copy_mount_string(const void __user *data)
3277{
3278 return data ? strndup_user(data, PATH_MAX) : NULL;
3279}
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295int path_mount(const char *dev_name, struct path *path,
3296 const char *type_page, unsigned long flags, void *data_page)
3297{
3298 unsigned int mnt_flags = 0, sb_flags;
3299 int ret;
3300
3301
3302 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3303 flags &= ~MS_MGC_MSK;
3304
3305
3306 if (data_page)
3307 ((char *)data_page)[PAGE_SIZE - 1] = 0;
3308
3309 if (flags & MS_NOUSER)
3310 return -EINVAL;
3311
3312 ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
3313 if (ret)
3314 return ret;
3315 if (!may_mount())
3316 return -EPERM;
3317 if (flags & SB_MANDLOCK)
3318 warn_mandlock();
3319
3320
3321 if (!(flags & MS_NOATIME))
3322 mnt_flags |= MNT_RELATIME;
3323
3324
3325 if (flags & MS_NOSUID)
3326 mnt_flags |= MNT_NOSUID;
3327 if (flags & MS_NODEV)
3328 mnt_flags |= MNT_NODEV;
3329 if (flags & MS_NOEXEC)
3330 mnt_flags |= MNT_NOEXEC;
3331 if (flags & MS_NOATIME)
3332 mnt_flags |= MNT_NOATIME;
3333 if (flags & MS_NODIRATIME)
3334 mnt_flags |= MNT_NODIRATIME;
3335 if (flags & MS_STRICTATIME)
3336 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3337 if (flags & MS_RDONLY)
3338 mnt_flags |= MNT_READONLY;
3339 if (flags & MS_NOSYMFOLLOW)
3340 mnt_flags |= MNT_NOSYMFOLLOW;
3341
3342
3343 if ((flags & MS_REMOUNT) &&
3344 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3345 MS_STRICTATIME)) == 0)) {
3346 mnt_flags &= ~MNT_ATIME_MASK;
3347 mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
3348 }
3349
3350 sb_flags = flags & (SB_RDONLY |
3351 SB_SYNCHRONOUS |
3352 SB_MANDLOCK |
3353 SB_DIRSYNC |
3354 SB_SILENT |
3355 SB_POSIXACL |
3356 SB_LAZYTIME |
3357 SB_I_VERSION);
3358
3359 if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3360 return do_reconfigure_mnt(path, mnt_flags);
3361 if (flags & MS_REMOUNT)
3362 return do_remount(path, flags, sb_flags, mnt_flags, data_page);
3363 if (flags & MS_BIND)
3364 return do_loopback(path, dev_name, flags & MS_REC);
3365 if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3366 return do_change_type(path, flags);
3367 if (flags & MS_MOVE)
3368 return do_move_mount_old(path, dev_name);
3369
3370 return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
3371 data_page);
3372}
3373
3374long do_mount(const char *dev_name, const char __user *dir_name,
3375 const char *type_page, unsigned long flags, void *data_page)
3376{
3377 struct path path;
3378 int ret;
3379
3380 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
3381 if (ret)
3382 return ret;
3383 ret = path_mount(dev_name, &path, type_page, flags, data_page);
3384 path_put(&path);
3385 return ret;
3386}
3387
3388static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3389{
3390 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3391}
3392
3393static void dec_mnt_namespaces(struct ucounts *ucounts)
3394{
3395 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3396}
3397
3398static void free_mnt_ns(struct mnt_namespace *ns)
3399{
3400 if (!is_anon_ns(ns))
3401 ns_free_inum(&ns->ns);
3402 dec_mnt_namespaces(ns->ucounts);
3403 put_user_ns(ns->user_ns);
3404 kfree(ns);
3405}
3406
3407
3408
3409
3410
3411
3412
3413
3414static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
3415
3416static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3417{
3418 struct mnt_namespace *new_ns;
3419 struct ucounts *ucounts;
3420 int ret;
3421
3422 ucounts = inc_mnt_namespaces(user_ns);
3423 if (!ucounts)
3424 return ERR_PTR(-ENOSPC);
3425
3426 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
3427 if (!new_ns) {
3428 dec_mnt_namespaces(ucounts);
3429 return ERR_PTR(-ENOMEM);
3430 }
3431 if (!anon) {
3432 ret = ns_alloc_inum(&new_ns->ns);
3433 if (ret) {
3434 kfree(new_ns);
3435 dec_mnt_namespaces(ucounts);
3436 return ERR_PTR(ret);
3437 }
3438 }
3439 new_ns->ns.ops = &mntns_operations;
3440 if (!anon)
3441 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
3442 refcount_set(&new_ns->ns.count, 1);
3443 INIT_LIST_HEAD(&new_ns->list);
3444 init_waitqueue_head(&new_ns->poll);
3445 spin_lock_init(&new_ns->ns_lock);
3446 new_ns->user_ns = get_user_ns(user_ns);
3447 new_ns->ucounts = ucounts;
3448 return new_ns;
3449}
3450
3451__latent_entropy
3452struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3453 struct user_namespace *user_ns, struct fs_struct *new_fs)
3454{
3455 struct mnt_namespace *new_ns;
3456 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3457 struct mount *p, *q;
3458 struct mount *old;
3459 struct mount *new;
3460 int copy_flags;
3461
3462 BUG_ON(!ns);
3463
3464 if (likely(!(flags & CLONE_NEWNS))) {
3465 get_mnt_ns(ns);
3466 return ns;
3467 }
3468
3469 old = ns->root;
3470
3471 new_ns = alloc_mnt_ns(user_ns, false);
3472 if (IS_ERR(new_ns))
3473 return new_ns;
3474
3475 namespace_lock();
3476
3477 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
3478 if (user_ns != ns->user_ns)
3479 copy_flags |= CL_SHARED_TO_SLAVE;
3480 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3481 if (IS_ERR(new)) {
3482 namespace_unlock();
3483 free_mnt_ns(new_ns);
3484 return ERR_CAST(new);
3485 }
3486 if (user_ns != ns->user_ns) {
3487 lock_mount_hash();
3488 lock_mnt_tree(new);
3489 unlock_mount_hash();
3490 }
3491 new_ns->root = new;
3492 list_add_tail(&new_ns->list, &new->mnt_list);
3493
3494
3495
3496
3497
3498
3499 p = old;
3500 q = new;
3501 while (p) {
3502 q->mnt_ns = new_ns;
3503 new_ns->mounts++;
3504 if (new_fs) {
3505 if (&p->mnt == new_fs->root.mnt) {
3506 new_fs->root.mnt = mntget(&q->mnt);
3507 rootmnt = &p->mnt;
3508 }
3509 if (&p->mnt == new_fs->pwd.mnt) {
3510 new_fs->pwd.mnt = mntget(&q->mnt);
3511 pwdmnt = &p->mnt;
3512 }
3513 }
3514 p = next_mnt(p, old);
3515 q = next_mnt(q, new);
3516 if (!q)
3517 break;
3518 while (p->mnt.mnt_root != q->mnt.mnt_root)
3519 p = next_mnt(p, old);
3520 }
3521 namespace_unlock();
3522
3523 if (rootmnt)
3524 mntput(rootmnt);
3525 if (pwdmnt)
3526 mntput(pwdmnt);
3527
3528 return new_ns;
3529}
3530
3531struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3532{
3533 struct mount *mnt = real_mount(m);
3534 struct mnt_namespace *ns;
3535 struct super_block *s;
3536 struct path path;
3537 int err;
3538
3539 ns = alloc_mnt_ns(&init_user_ns, true);
3540 if (IS_ERR(ns)) {
3541 mntput(m);
3542 return ERR_CAST(ns);
3543 }
3544 mnt->mnt_ns = ns;
3545 ns->root = mnt;
3546 ns->mounts++;
3547 list_add(&mnt->mnt_list, &ns->list);
3548
3549 err = vfs_path_lookup(m->mnt_root, m,
3550 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3551
3552 put_mnt_ns(ns);
3553
3554 if (err)
3555 return ERR_PTR(err);
3556
3557
3558 s = path.mnt->mnt_sb;
3559 atomic_inc(&s->s_active);
3560 mntput(path.mnt);
3561
3562 down_write(&s->s_umount);
3563
3564 return path.dentry;
3565}
3566EXPORT_SYMBOL(mount_subtree);
3567
3568SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3569 char __user *, type, unsigned long, flags, void __user *, data)
3570{
3571 int ret;
3572 char *kernel_type;
3573 char *kernel_dev;
3574 void *options;
3575
3576 kernel_type = copy_mount_string(type);
3577 ret = PTR_ERR(kernel_type);
3578 if (IS_ERR(kernel_type))
3579 goto out_type;
3580
3581 kernel_dev = copy_mount_string(dev_name);
3582 ret = PTR_ERR(kernel_dev);
3583 if (IS_ERR(kernel_dev))
3584 goto out_dev;
3585
3586 options = copy_mount_options(data);
3587 ret = PTR_ERR(options);
3588 if (IS_ERR(options))
3589 goto out_data;
3590
3591 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3592
3593 kfree(options);
3594out_data:
3595 kfree(kernel_dev);
3596out_dev:
3597 kfree(kernel_type);
3598out_type:
3599 return ret;
3600}
3601
3602#define FSMOUNT_VALID_FLAGS \
3603 (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
3604 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME | \
3605 MOUNT_ATTR_NOSYMFOLLOW)
3606
3607#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
3608
3609#define MOUNT_SETATTR_PROPAGATION_FLAGS \
3610 (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
3611
3612static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
3613{
3614 unsigned int mnt_flags = 0;
3615
3616 if (attr_flags & MOUNT_ATTR_RDONLY)
3617 mnt_flags |= MNT_READONLY;
3618 if (attr_flags & MOUNT_ATTR_NOSUID)
3619 mnt_flags |= MNT_NOSUID;
3620 if (attr_flags & MOUNT_ATTR_NODEV)
3621 mnt_flags |= MNT_NODEV;
3622 if (attr_flags & MOUNT_ATTR_NOEXEC)
3623 mnt_flags |= MNT_NOEXEC;
3624 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3625 mnt_flags |= MNT_NODIRATIME;
3626 if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
3627 mnt_flags |= MNT_NOSYMFOLLOW;
3628
3629 return mnt_flags;
3630}
3631
3632
3633
3634
3635
3636SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3637 unsigned int, attr_flags)
3638{
3639 struct mnt_namespace *ns;
3640 struct fs_context *fc;
3641 struct file *file;
3642 struct path newmount;
3643 struct mount *mnt;
3644 struct fd f;
3645 unsigned int mnt_flags = 0;
3646 long ret;
3647
3648 if (!may_mount())
3649 return -EPERM;
3650
3651 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3652 return -EINVAL;
3653
3654 if (attr_flags & ~FSMOUNT_VALID_FLAGS)
3655 return -EINVAL;
3656
3657 mnt_flags = attr_flags_to_mnt_flags(attr_flags);
3658
3659 switch (attr_flags & MOUNT_ATTR__ATIME) {
3660 case MOUNT_ATTR_STRICTATIME:
3661 break;
3662 case MOUNT_ATTR_NOATIME:
3663 mnt_flags |= MNT_NOATIME;
3664 break;
3665 case MOUNT_ATTR_RELATIME:
3666 mnt_flags |= MNT_RELATIME;
3667 break;
3668 default:
3669 return -EINVAL;
3670 }
3671
3672 f = fdget(fs_fd);
3673 if (!f.file)
3674 return -EBADF;
3675
3676 ret = -EINVAL;
3677 if (f.file->f_op != &fscontext_fops)
3678 goto err_fsfd;
3679
3680 fc = f.file->private_data;
3681
3682 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3683 if (ret < 0)
3684 goto err_fsfd;
3685
3686
3687 ret = -EINVAL;
3688 if (!fc->root)
3689 goto err_unlock;
3690
3691 ret = -EPERM;
3692 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3693 pr_warn("VFS: Mount too revealing\n");
3694 goto err_unlock;
3695 }
3696
3697 ret = -EBUSY;
3698 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3699 goto err_unlock;
3700
3701 if (fc->sb_flags & SB_MANDLOCK)
3702 warn_mandlock();
3703
3704 newmount.mnt = vfs_create_mount(fc);
3705 if (IS_ERR(newmount.mnt)) {
3706 ret = PTR_ERR(newmount.mnt);
3707 goto err_unlock;
3708 }
3709 newmount.dentry = dget(fc->root);
3710 newmount.mnt->mnt_flags = mnt_flags;
3711
3712
3713
3714
3715
3716
3717 vfs_clean_context(fc);
3718
3719 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3720 if (IS_ERR(ns)) {
3721 ret = PTR_ERR(ns);
3722 goto err_path;
3723 }
3724 mnt = real_mount(newmount.mnt);
3725 mnt->mnt_ns = ns;
3726 ns->root = mnt;
3727 ns->mounts = 1;
3728 list_add(&mnt->mnt_list, &ns->list);
3729 mntget(newmount.mnt);
3730
3731
3732
3733
3734 file = dentry_open(&newmount, O_PATH, fc->cred);
3735 if (IS_ERR(file)) {
3736 dissolve_on_fput(newmount.mnt);
3737 ret = PTR_ERR(file);
3738 goto err_path;
3739 }
3740 file->f_mode |= FMODE_NEED_UNMOUNT;
3741
3742 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3743 if (ret >= 0)
3744 fd_install(ret, file);
3745 else
3746 fput(file);
3747
3748err_path:
3749 path_put(&newmount);
3750err_unlock:
3751 mutex_unlock(&fc->uapi_mutex);
3752err_fsfd:
3753 fdput(f);
3754 return ret;
3755}
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765SYSCALL_DEFINE5(move_mount,
3766 int, from_dfd, const char __user *, from_pathname,
3767 int, to_dfd, const char __user *, to_pathname,
3768 unsigned int, flags)
3769{
3770 struct path from_path, to_path;
3771 unsigned int lflags;
3772 int ret = 0;
3773
3774 if (!may_mount())
3775 return -EPERM;
3776
3777 if (flags & ~MOVE_MOUNT__MASK)
3778 return -EINVAL;
3779
3780
3781
3782
3783
3784 lflags = 0;
3785 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3786 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3787 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3788
3789 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3790 if (ret < 0)
3791 return ret;
3792
3793 lflags = 0;
3794 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3795 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3796 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3797
3798 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3799 if (ret < 0)
3800 goto out_from;
3801
3802 ret = security_move_mount(&from_path, &to_path);
3803 if (ret < 0)
3804 goto out_to;
3805
3806 if (flags & MOVE_MOUNT_SET_GROUP)
3807 ret = do_set_group(&from_path, &to_path);
3808 else
3809 ret = do_move_mount(&from_path, &to_path);
3810
3811out_to:
3812 path_put(&to_path);
3813out_from:
3814 path_put(&from_path);
3815 return ret;
3816}
3817
3818
3819
3820
3821
3822
3823bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3824 const struct path *root)
3825{
3826 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3827 dentry = mnt->mnt_mountpoint;
3828 mnt = mnt->mnt_parent;
3829 }
3830 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3831}
3832
3833bool path_is_under(const struct path *path1, const struct path *path2)
3834{
3835 bool res;
3836 read_seqlock_excl(&mount_lock);
3837 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3838 read_sequnlock_excl(&mount_lock);
3839 return res;
3840}
3841EXPORT_SYMBOL(path_is_under);
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3869 const char __user *, put_old)
3870{
3871 struct path new, old, root;
3872 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
3873 struct mountpoint *old_mp, *root_mp;
3874 int error;
3875
3876 if (!may_mount())
3877 return -EPERM;
3878
3879 error = user_path_at(AT_FDCWD, new_root,
3880 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
3881 if (error)
3882 goto out0;
3883
3884 error = user_path_at(AT_FDCWD, put_old,
3885 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
3886 if (error)
3887 goto out1;
3888
3889 error = security_sb_pivotroot(&old, &new);
3890 if (error)
3891 goto out2;
3892
3893 get_fs_root(current->fs, &root);
3894 old_mp = lock_mount(&old);
3895 error = PTR_ERR(old_mp);
3896 if (IS_ERR(old_mp))
3897 goto out3;
3898
3899 error = -EINVAL;
3900 new_mnt = real_mount(new.mnt);
3901 root_mnt = real_mount(root.mnt);
3902 old_mnt = real_mount(old.mnt);
3903 ex_parent = new_mnt->mnt_parent;
3904 root_parent = root_mnt->mnt_parent;
3905 if (IS_MNT_SHARED(old_mnt) ||
3906 IS_MNT_SHARED(ex_parent) ||
3907 IS_MNT_SHARED(root_parent))
3908 goto out4;
3909 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3910 goto out4;
3911 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3912 goto out4;
3913 error = -ENOENT;
3914 if (d_unlinked(new.dentry))
3915 goto out4;
3916 error = -EBUSY;
3917 if (new_mnt == root_mnt || old_mnt == root_mnt)
3918 goto out4;
3919 error = -EINVAL;
3920 if (root.mnt->mnt_root != root.dentry)
3921 goto out4;
3922 if (!mnt_has_parent(root_mnt))
3923 goto out4;
3924 if (new.mnt->mnt_root != new.dentry)
3925 goto out4;
3926 if (!mnt_has_parent(new_mnt))
3927 goto out4;
3928
3929 if (!is_path_reachable(old_mnt, old.dentry, &new))
3930 goto out4;
3931
3932 if (!is_path_reachable(new_mnt, new.dentry, &root))
3933 goto out4;
3934 lock_mount_hash();
3935 umount_mnt(new_mnt);
3936 root_mp = unhash_mnt(root_mnt);
3937 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3938 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3939 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3940 }
3941
3942 attach_mnt(root_mnt, old_mnt, old_mp);
3943
3944 attach_mnt(new_mnt, root_parent, root_mp);
3945 mnt_add_count(root_parent, -1);
3946 touch_mnt_namespace(current->nsproxy->mnt_ns);
3947
3948 list_del_init(&new_mnt->mnt_expire);
3949 put_mountpoint(root_mp);
3950 unlock_mount_hash();
3951 chroot_fs_refs(&root, &new);
3952 error = 0;
3953out4:
3954 unlock_mount(old_mp);
3955 if (!error)
3956 mntput_no_expire(ex_parent);
3957out3:
3958 path_put(&root);
3959out2:
3960 path_put(&old);
3961out1:
3962 path_put(&new);
3963out0:
3964 return error;
3965}
3966
3967static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
3968{
3969 unsigned int flags = mnt->mnt.mnt_flags;
3970
3971
3972 flags &= ~kattr->attr_clr;
3973
3974 flags |= kattr->attr_set;
3975
3976 return flags;
3977}
3978
3979static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
3980{
3981 struct vfsmount *m = &mnt->mnt;
3982 struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
3983
3984 if (!kattr->mnt_userns)
3985 return 0;
3986
3987
3988
3989
3990
3991 if (kattr->mnt_userns == fs_userns)
3992 return -EINVAL;
3993
3994
3995
3996
3997
3998
3999 if (is_idmapped_mnt(m))
4000 return -EPERM;
4001
4002
4003 if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
4004 return -EINVAL;
4005
4006
4007 if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
4008 return -EPERM;
4009
4010
4011 if (!is_anon_ns(mnt->mnt_ns))
4012 return -EINVAL;
4013
4014 return 0;
4015}
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026static inline bool mnt_allow_writers(const struct mount_kattr *kattr,
4027 const struct mount *mnt)
4028{
4029 return (!(kattr->attr_set & MNT_READONLY) ||
4030 (mnt->mnt.mnt_flags & MNT_READONLY)) &&
4031 !kattr->mnt_userns;
4032}
4033
4034static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
4035{
4036 struct mount *m;
4037 int err;
4038
4039 for (m = mnt; m; m = next_mnt(m, mnt)) {
4040 if (!can_change_locked_flags(m, recalc_flags(kattr, m))) {
4041 err = -EPERM;
4042 break;
4043 }
4044
4045 err = can_idmap_mount(kattr, m);
4046 if (err)
4047 break;
4048
4049 if (!mnt_allow_writers(kattr, m)) {
4050 err = mnt_hold_writers(m);
4051 if (err)
4052 break;
4053 }
4054
4055 if (!kattr->recurse)
4056 return 0;
4057 }
4058
4059 if (err) {
4060 struct mount *p;
4061
4062
4063
4064
4065
4066
4067 for (p = mnt; p; p = next_mnt(p, mnt)) {
4068
4069 if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
4070 mnt_unhold_writers(p);
4071
4072
4073
4074
4075
4076 if (p == m)
4077 break;
4078 }
4079 }
4080 return err;
4081}
4082
4083static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
4084{
4085 struct user_namespace *mnt_userns, *old_mnt_userns;
4086
4087 if (!kattr->mnt_userns)
4088 return;
4089
4090
4091
4092
4093
4094 old_mnt_userns = mnt->mnt.mnt_userns;
4095
4096 mnt_userns = get_user_ns(kattr->mnt_userns);
4097
4098 smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
4099
4100
4101
4102
4103
4104 if (!initial_idmapping(old_mnt_userns))
4105 put_user_ns(old_mnt_userns);
4106}
4107
4108static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
4109{
4110 struct mount *m;
4111
4112 for (m = mnt; m; m = next_mnt(m, mnt)) {
4113 unsigned int flags;
4114
4115 do_idmap_mount(kattr, m);
4116 flags = recalc_flags(kattr, m);
4117 WRITE_ONCE(m->mnt.mnt_flags, flags);
4118
4119
4120 if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
4121 mnt_unhold_writers(m);
4122
4123 if (kattr->propagation)
4124 change_mnt_propagation(m, kattr->propagation);
4125 if (!kattr->recurse)
4126 break;
4127 }
4128 touch_mnt_namespace(mnt->mnt_ns);
4129}
4130
4131static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
4132{
4133 struct mount *mnt = real_mount(path->mnt);
4134 int err = 0;
4135
4136 if (path->dentry != mnt->mnt.mnt_root)
4137 return -EINVAL;
4138
4139 if (kattr->propagation) {
4140
4141
4142
4143
4144 namespace_lock();
4145 if (kattr->propagation == MS_SHARED) {
4146 err = invent_group_ids(mnt, kattr->recurse);
4147 if (err) {
4148 namespace_unlock();
4149 return err;
4150 }
4151 }
4152 }
4153
4154 err = -EINVAL;
4155 lock_mount_hash();
4156
4157
4158 if (!is_mounted(&mnt->mnt))
4159 goto out;
4160
4161
4162
4163
4164
4165
4166
4167 if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
4168 goto out;
4169
4170
4171
4172
4173
4174
4175 err = mount_setattr_prepare(kattr, mnt);
4176 if (!err)
4177 mount_setattr_commit(kattr, mnt);
4178
4179out:
4180 unlock_mount_hash();
4181
4182 if (kattr->propagation) {
4183 namespace_unlock();
4184 if (err)
4185 cleanup_group_ids(mnt, NULL);
4186 }
4187
4188 return err;
4189}
4190
4191static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
4192 struct mount_kattr *kattr, unsigned int flags)
4193{
4194 int err = 0;
4195 struct ns_common *ns;
4196 struct user_namespace *mnt_userns;
4197 struct file *file;
4198
4199 if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
4200 return 0;
4201
4202
4203
4204
4205
4206
4207 if (attr->attr_clr & MOUNT_ATTR_IDMAP)
4208 return -EINVAL;
4209
4210 if (attr->userns_fd > INT_MAX)
4211 return -EINVAL;
4212
4213 file = fget(attr->userns_fd);
4214 if (!file)
4215 return -EBADF;
4216
4217 if (!proc_ns_file(file)) {
4218 err = -EINVAL;
4219 goto out_fput;
4220 }
4221
4222 ns = get_proc_ns(file_inode(file));
4223 if (ns->ops->type != CLONE_NEWUSER) {
4224 err = -EINVAL;
4225 goto out_fput;
4226 }
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236 mnt_userns = container_of(ns, struct user_namespace, ns);
4237 if (initial_idmapping(mnt_userns)) {
4238 err = -EPERM;
4239 goto out_fput;
4240 }
4241 kattr->mnt_userns = get_user_ns(mnt_userns);
4242
4243out_fput:
4244 fput(file);
4245 return err;
4246}
4247
4248static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
4249 struct mount_kattr *kattr, unsigned int flags)
4250{
4251 unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
4252
4253 if (flags & AT_NO_AUTOMOUNT)
4254 lookup_flags &= ~LOOKUP_AUTOMOUNT;
4255 if (flags & AT_SYMLINK_NOFOLLOW)
4256 lookup_flags &= ~LOOKUP_FOLLOW;
4257 if (flags & AT_EMPTY_PATH)
4258 lookup_flags |= LOOKUP_EMPTY;
4259
4260 *kattr = (struct mount_kattr) {
4261 .lookup_flags = lookup_flags,
4262 .recurse = !!(flags & AT_RECURSIVE),
4263 };
4264
4265 if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
4266 return -EINVAL;
4267 if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
4268 return -EINVAL;
4269 kattr->propagation = attr->propagation;
4270
4271 if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
4272 return -EINVAL;
4273
4274 kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
4275 kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286 if (attr->attr_clr & MOUNT_ATTR__ATIME) {
4287 if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
4288 return -EINVAL;
4289
4290
4291
4292
4293
4294 kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
4295 switch (attr->attr_set & MOUNT_ATTR__ATIME) {
4296 case MOUNT_ATTR_RELATIME:
4297 kattr->attr_set |= MNT_RELATIME;
4298 break;
4299 case MOUNT_ATTR_NOATIME:
4300 kattr->attr_set |= MNT_NOATIME;
4301 break;
4302 case MOUNT_ATTR_STRICTATIME:
4303 break;
4304 default:
4305 return -EINVAL;
4306 }
4307 } else {
4308 if (attr->attr_set & MOUNT_ATTR__ATIME)
4309 return -EINVAL;
4310 }
4311
4312 return build_mount_idmapped(attr, usize, kattr, flags);
4313}
4314
4315static void finish_mount_kattr(struct mount_kattr *kattr)
4316{
4317 put_user_ns(kattr->mnt_userns);
4318 kattr->mnt_userns = NULL;
4319}
4320
4321SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
4322 unsigned int, flags, struct mount_attr __user *, uattr,
4323 size_t, usize)
4324{
4325 int err;
4326 struct path target;
4327 struct mount_attr attr;
4328 struct mount_kattr kattr;
4329
4330 BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
4331
4332 if (flags & ~(AT_EMPTY_PATH |
4333 AT_RECURSIVE |
4334 AT_SYMLINK_NOFOLLOW |
4335 AT_NO_AUTOMOUNT))
4336 return -EINVAL;
4337
4338 if (unlikely(usize > PAGE_SIZE))
4339 return -E2BIG;
4340 if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
4341 return -EINVAL;
4342
4343 if (!may_mount())
4344 return -EPERM;
4345
4346 err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
4347 if (err)
4348 return err;
4349
4350
4351 if (attr.attr_set == 0 &&
4352 attr.attr_clr == 0 &&
4353 attr.propagation == 0)
4354 return 0;
4355
4356 err = build_mount_kattr(&attr, usize, &kattr, flags);
4357 if (err)
4358 return err;
4359
4360 err = user_path_at(dfd, path, kattr.lookup_flags, &target);
4361 if (!err) {
4362 err = do_mount_setattr(&target, &kattr);
4363 path_put(&target);
4364 }
4365 finish_mount_kattr(&kattr);
4366 return err;
4367}
4368
4369static void __init init_mount_tree(void)
4370{
4371 struct vfsmount *mnt;
4372 struct mount *m;
4373 struct mnt_namespace *ns;
4374 struct path root;
4375
4376 mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
4377 if (IS_ERR(mnt))
4378 panic("Can't create rootfs");
4379
4380 ns = alloc_mnt_ns(&init_user_ns, false);
4381 if (IS_ERR(ns))
4382 panic("Can't allocate initial namespace");
4383 m = real_mount(mnt);
4384 m->mnt_ns = ns;
4385 ns->root = m;
4386 ns->mounts = 1;
4387 list_add(&m->mnt_list, &ns->list);
4388 init_task.nsproxy->mnt_ns = ns;
4389 get_mnt_ns(ns);
4390
4391 root.mnt = mnt;
4392 root.dentry = mnt->mnt_root;
4393 mnt->mnt_flags |= MNT_LOCKED;
4394
4395 set_fs_pwd(current->fs, &root);
4396 set_fs_root(current->fs, &root);
4397}
4398
4399void __init mnt_init(void)
4400{
4401 int err;
4402
4403 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
4404 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
4405
4406 mount_hashtable = alloc_large_system_hash("Mount-cache",
4407 sizeof(struct hlist_head),
4408 mhash_entries, 19,
4409 HASH_ZERO,
4410 &m_hash_shift, &m_hash_mask, 0, 0);
4411 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
4412 sizeof(struct hlist_head),
4413 mphash_entries, 19,
4414 HASH_ZERO,
4415 &mp_hash_shift, &mp_hash_mask, 0, 0);
4416
4417 if (!mount_hashtable || !mountpoint_hashtable)
4418 panic("Failed to allocate mount hash table\n");
4419
4420 kernfs_init();
4421
4422 err = sysfs_init();
4423 if (err)
4424 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
4425 __func__, err);
4426 fs_kobj = kobject_create_and_add("fs", NULL);
4427 if (!fs_kobj)
4428 printk(KERN_WARNING "%s: kobj create error\n", __func__);
4429 shmem_init();
4430 init_rootfs();
4431 init_mount_tree();
4432}
4433
4434void put_mnt_ns(struct mnt_namespace *ns)
4435{
4436 if (!refcount_dec_and_test(&ns->ns.count))
4437 return;
4438 drop_collected_mounts(&ns->root->mnt);
4439 free_mnt_ns(ns);
4440}
4441
4442struct vfsmount *kern_mount(struct file_system_type *type)
4443{
4444 struct vfsmount *mnt;
4445 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
4446 if (!IS_ERR(mnt)) {
4447
4448
4449
4450
4451 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
4452 }
4453 return mnt;
4454}
4455EXPORT_SYMBOL_GPL(kern_mount);
4456
4457void kern_unmount(struct vfsmount *mnt)
4458{
4459
4460 if (!IS_ERR_OR_NULL(mnt)) {
4461 real_mount(mnt)->mnt_ns = NULL;
4462 synchronize_rcu();
4463 mntput(mnt);
4464 }
4465}
4466EXPORT_SYMBOL(kern_unmount);
4467
4468void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
4469{
4470 unsigned int i;
4471
4472 for (i = 0; i < num; i++)
4473 if (mnt[i])
4474 real_mount(mnt[i])->mnt_ns = NULL;
4475 synchronize_rcu_expedited();
4476 for (i = 0; i < num; i++)
4477 mntput(mnt[i]);
4478}
4479EXPORT_SYMBOL(kern_unmount_array);
4480
4481bool our_mnt(struct vfsmount *mnt)
4482{
4483 return check_mnt(real_mount(mnt));
4484}
4485
4486bool current_chrooted(void)
4487{
4488
4489 struct path ns_root;
4490 struct path fs_root;
4491 bool chrooted;
4492
4493
4494 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
4495 ns_root.dentry = ns_root.mnt->mnt_root;
4496 path_get(&ns_root);
4497 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
4498 ;
4499
4500 get_fs_root(current->fs, &fs_root);
4501
4502 chrooted = !path_equal(&fs_root, &ns_root);
4503
4504 path_put(&fs_root);
4505 path_put(&ns_root);
4506
4507 return chrooted;
4508}
4509
4510static bool mnt_already_visible(struct mnt_namespace *ns,
4511 const struct super_block *sb,
4512 int *new_mnt_flags)
4513{
4514 int new_flags = *new_mnt_flags;
4515 struct mount *mnt;
4516 bool visible = false;
4517
4518 down_read(&namespace_sem);
4519 lock_ns_list(ns);
4520 list_for_each_entry(mnt, &ns->list, mnt_list) {
4521 struct mount *child;
4522 int mnt_flags;
4523
4524 if (mnt_is_cursor(mnt))
4525 continue;
4526
4527 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
4528 continue;
4529
4530
4531
4532
4533 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
4534 continue;
4535
4536
4537 mnt_flags = mnt->mnt.mnt_flags;
4538
4539
4540 if (sb_rdonly(mnt->mnt.mnt_sb))
4541 mnt_flags |= MNT_LOCK_READONLY;
4542
4543
4544
4545
4546 if ((mnt_flags & MNT_LOCK_READONLY) &&
4547 !(new_flags & MNT_READONLY))
4548 continue;
4549 if ((mnt_flags & MNT_LOCK_ATIME) &&
4550 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
4551 continue;
4552
4553
4554
4555
4556
4557 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
4558 struct inode *inode = child->mnt_mountpoint->d_inode;
4559
4560 if (!(child->mnt.mnt_flags & MNT_LOCKED))
4561 continue;
4562
4563 if (!is_empty_dir_inode(inode))
4564 goto next;
4565 }
4566
4567 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
4568 MNT_LOCK_ATIME);
4569 visible = true;
4570 goto found;
4571 next: ;
4572 }
4573found:
4574 unlock_ns_list(ns);
4575 up_read(&namespace_sem);
4576 return visible;
4577}
4578
4579static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
4580{
4581 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
4582 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
4583 unsigned long s_iflags;
4584
4585 if (ns->user_ns == &init_user_ns)
4586 return false;
4587
4588
4589 s_iflags = sb->s_iflags;
4590 if (!(s_iflags & SB_I_USERNS_VISIBLE))
4591 return false;
4592
4593 if ((s_iflags & required_iflags) != required_iflags) {
4594 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
4595 required_iflags);
4596 return true;
4597 }
4598
4599 return !mnt_already_visible(ns, sb, new_mnt_flags);
4600}
4601
4602bool mnt_may_suid(struct vfsmount *mnt)
4603{
4604
4605
4606
4607
4608
4609
4610
4611 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
4612 current_in_userns(mnt->mnt_sb->s_user_ns);
4613}
4614
4615static struct ns_common *mntns_get(struct task_struct *task)
4616{
4617 struct ns_common *ns = NULL;
4618 struct nsproxy *nsproxy;
4619
4620 task_lock(task);
4621 nsproxy = task->nsproxy;
4622 if (nsproxy) {
4623 ns = &nsproxy->mnt_ns->ns;
4624 get_mnt_ns(to_mnt_ns(ns));
4625 }
4626 task_unlock(task);
4627
4628 return ns;
4629}
4630
4631static void mntns_put(struct ns_common *ns)
4632{
4633 put_mnt_ns(to_mnt_ns(ns));
4634}
4635
4636static int mntns_install(struct nsset *nsset, struct ns_common *ns)
4637{
4638 struct nsproxy *nsproxy = nsset->nsproxy;
4639 struct fs_struct *fs = nsset->fs;
4640 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4641 struct user_namespace *user_ns = nsset->cred->user_ns;
4642 struct path root;
4643 int err;
4644
4645 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4646 !ns_capable(user_ns, CAP_SYS_CHROOT) ||
4647 !ns_capable(user_ns, CAP_SYS_ADMIN))
4648 return -EPERM;
4649
4650 if (is_anon_ns(mnt_ns))
4651 return -EINVAL;
4652
4653 if (fs->users != 1)
4654 return -EINVAL;
4655
4656 get_mnt_ns(mnt_ns);
4657 old_mnt_ns = nsproxy->mnt_ns;
4658 nsproxy->mnt_ns = mnt_ns;
4659
4660
4661 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
4662 "/", LOOKUP_DOWN, &root);
4663 if (err) {
4664
4665 nsproxy->mnt_ns = old_mnt_ns;
4666 put_mnt_ns(mnt_ns);
4667 return err;
4668 }
4669
4670 put_mnt_ns(old_mnt_ns);
4671
4672
4673 set_fs_pwd(fs, &root);
4674 set_fs_root(fs, &root);
4675
4676 path_put(&root);
4677 return 0;
4678}
4679
4680static struct user_namespace *mntns_owner(struct ns_common *ns)
4681{
4682 return to_mnt_ns(ns)->user_ns;
4683}
4684
4685const struct proc_ns_operations mntns_operations = {
4686 .name = "mnt",
4687 .type = CLONE_NEWNS,
4688 .get = mntns_get,
4689 .put = mntns_put,
4690 .install = mntns_install,
4691 .owner = mntns_owner,
4692};
4693
4694#ifdef CONFIG_SYSCTL
4695static struct ctl_table fs_namespace_sysctls[] = {
4696 {
4697 .procname = "mount-max",
4698 .data = &sysctl_mount_max,
4699 .maxlen = sizeof(unsigned int),
4700 .mode = 0644,
4701 .proc_handler = proc_dointvec_minmax,
4702 .extra1 = SYSCTL_ONE,
4703 },
4704 { }
4705};
4706
4707static int __init init_fs_namespace_sysctls(void)
4708{
4709 register_sysctl_init("fs", fs_namespace_sysctls);
4710 return 0;
4711}
4712fs_initcall(init_fs_namespace_sysctls);
4713
4714#endif
4715