1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/idr.h>
19#include <linux/init.h>
20#include <linux/fs_struct.h>
21#include <linux/fsnotify.h>
22#include <linux/uaccess.h>
23#include <linux/proc_ns.h>
24#include <linux/magic.h>
25#include <linux/bootmem.h>
26#include <linux/task_work.h>
27#include "pnode.h"
28#include "internal.h"
29
30static unsigned int m_hash_mask __read_mostly;
31static unsigned int m_hash_shift __read_mostly;
32static unsigned int mp_hash_mask __read_mostly;
33static unsigned int mp_hash_shift __read_mostly;
34
35static __initdata unsigned long mhash_entries;
36static int __init set_mhash_entries(char *str)
37{
38 if (!str)
39 return 0;
40 mhash_entries = simple_strtoul(str, &str, 0);
41 return 1;
42}
43__setup("mhash_entries=", set_mhash_entries);
44
45static __initdata unsigned long mphash_entries;
46static int __init set_mphash_entries(char *str)
47{
48 if (!str)
49 return 0;
50 mphash_entries = simple_strtoul(str, &str, 0);
51 return 1;
52}
53__setup("mphash_entries=", set_mphash_entries);
54
55static u64 event;
56static DEFINE_IDA(mnt_id_ida);
57static DEFINE_IDA(mnt_group_ida);
58static DEFINE_SPINLOCK(mnt_id_lock);
59static int mnt_id_start = 0;
60static int mnt_group_start = 1;
61
62static struct hlist_head *mount_hashtable __read_mostly;
63static struct hlist_head *mountpoint_hashtable __read_mostly;
64static struct kmem_cache *mnt_cache __read_mostly;
65static DECLARE_RWSEM(namespace_sem);
66
67
68struct kobject *fs_kobj;
69EXPORT_SYMBOL_GPL(fs_kobj);
70
71
72
73
74
75
76
77
78
79__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
80
81static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
82{
83 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
84 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
85 tmp = tmp + (tmp >> m_hash_shift);
86 return &mount_hashtable[tmp & m_hash_mask];
87}
88
89static inline struct hlist_head *mp_hash(struct dentry *dentry)
90{
91 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
92 tmp = tmp + (tmp >> mp_hash_shift);
93 return &mountpoint_hashtable[tmp & mp_hash_mask];
94}
95
96
97
98
99
100static int mnt_alloc_id(struct mount *mnt)
101{
102 int res;
103
104retry:
105 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
106 spin_lock(&mnt_id_lock);
107 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
108 if (!res)
109 mnt_id_start = mnt->mnt_id + 1;
110 spin_unlock(&mnt_id_lock);
111 if (res == -EAGAIN)
112 goto retry;
113
114 return res;
115}
116
117static void mnt_free_id(struct mount *mnt)
118{
119 int id = mnt->mnt_id;
120 spin_lock(&mnt_id_lock);
121 ida_remove(&mnt_id_ida, id);
122 if (mnt_id_start > id)
123 mnt_id_start = id;
124 spin_unlock(&mnt_id_lock);
125}
126
127
128
129
130
131
132static int mnt_alloc_group_id(struct mount *mnt)
133{
134 int res;
135
136 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
137 return -ENOMEM;
138
139 res = ida_get_new_above(&mnt_group_ida,
140 mnt_group_start,
141 &mnt->mnt_group_id);
142 if (!res)
143 mnt_group_start = mnt->mnt_group_id + 1;
144
145 return res;
146}
147
148
149
150
151void mnt_release_group_id(struct mount *mnt)
152{
153 int id = mnt->mnt_group_id;
154 ida_remove(&mnt_group_ida, id);
155 if (mnt_group_start > id)
156 mnt_group_start = id;
157 mnt->mnt_group_id = 0;
158}
159
160
161
162
163static inline void mnt_add_count(struct mount *mnt, int n)
164{
165#ifdef CONFIG_SMP
166 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
167#else
168 preempt_disable();
169 mnt->mnt_count += n;
170 preempt_enable();
171#endif
172}
173
174
175
176
177unsigned int mnt_get_count(struct mount *mnt)
178{
179#ifdef CONFIG_SMP
180 unsigned int count = 0;
181 int cpu;
182
183 for_each_possible_cpu(cpu) {
184 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
185 }
186
187 return count;
188#else
189 return mnt->mnt_count;
190#endif
191}
192
193static void drop_mountpoint(struct fs_pin *p)
194{
195 struct mount *m = container_of(p, struct mount, mnt_umount);
196 dput(m->mnt_ex_mountpoint);
197 pin_remove(p);
198 mntput(&m->mnt);
199}
200
201static struct mount *alloc_vfsmnt(const char *name)
202{
203 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
204 if (mnt) {
205 int err;
206
207 err = mnt_alloc_id(mnt);
208 if (err)
209 goto out_free_cache;
210
211 if (name) {
212 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
213 if (!mnt->mnt_devname)
214 goto out_free_id;
215 }
216
217#ifdef CONFIG_SMP
218 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
219 if (!mnt->mnt_pcp)
220 goto out_free_devname;
221
222 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
223#else
224 mnt->mnt_count = 1;
225 mnt->mnt_writers = 0;
226#endif
227
228 INIT_HLIST_NODE(&mnt->mnt_hash);
229 INIT_LIST_HEAD(&mnt->mnt_child);
230 INIT_LIST_HEAD(&mnt->mnt_mounts);
231 INIT_LIST_HEAD(&mnt->mnt_list);
232 INIT_LIST_HEAD(&mnt->mnt_expire);
233 INIT_LIST_HEAD(&mnt->mnt_share);
234 INIT_LIST_HEAD(&mnt->mnt_slave_list);
235 INIT_LIST_HEAD(&mnt->mnt_slave);
236 INIT_HLIST_NODE(&mnt->mnt_mp_list);
237#ifdef CONFIG_FSNOTIFY
238 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
239#endif
240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
241 }
242 return mnt;
243
244#ifdef CONFIG_SMP
245out_free_devname:
246 kfree_const(mnt->mnt_devname);
247#endif
248out_free_id:
249 mnt_free_id(mnt);
250out_free_cache:
251 kmem_cache_free(mnt_cache, mnt);
252 return NULL;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274int __mnt_is_readonly(struct vfsmount *mnt)
275{
276 if (mnt->mnt_flags & MNT_READONLY)
277 return 1;
278 if (mnt->mnt_sb->s_flags & MS_RDONLY)
279 return 1;
280 return 0;
281}
282EXPORT_SYMBOL_GPL(__mnt_is_readonly);
283
284static inline void mnt_inc_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers++;
290#endif
291}
292
293static inline void mnt_dec_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
297#else
298 mnt->mnt_writers--;
299#endif
300}
301
302static unsigned int mnt_get_writers(struct mount *mnt)
303{
304#ifdef CONFIG_SMP
305 unsigned int count = 0;
306 int cpu;
307
308 for_each_possible_cpu(cpu) {
309 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
310 }
311
312 return count;
313#else
314 return mnt->mnt_writers;
315#endif
316}
317
318static int mnt_is_readonly(struct vfsmount *mnt)
319{
320 if (mnt->mnt_sb->s_readonly_remount)
321 return 1;
322
323 smp_rmb();
324 return __mnt_is_readonly(mnt);
325}
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343int __mnt_want_write(struct vfsmount *m)
344{
345 struct mount *mnt = real_mount(m);
346 int ret = 0;
347
348 preempt_disable();
349 mnt_inc_writers(mnt);
350
351
352
353
354
355 smp_mb();
356 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
357 cpu_relax();
358
359
360
361
362
363 smp_rmb();
364 if (mnt_is_readonly(m)) {
365 mnt_dec_writers(mnt);
366 ret = -EROFS;
367 }
368 preempt_enable();
369
370 return ret;
371}
372
373
374
375
376
377
378
379
380
381
382int mnt_want_write(struct vfsmount *m)
383{
384 int ret;
385
386 sb_start_write(m->mnt_sb);
387 ret = __mnt_want_write(m);
388 if (ret)
389 sb_end_write(m->mnt_sb);
390 return ret;
391}
392EXPORT_SYMBOL_GPL(mnt_want_write);
393
394
395
396
397
398
399
400
401
402
403
404
405
406int mnt_clone_write(struct vfsmount *mnt)
407{
408
409 if (__mnt_is_readonly(mnt))
410 return -EROFS;
411 preempt_disable();
412 mnt_inc_writers(real_mount(mnt));
413 preempt_enable();
414 return 0;
415}
416EXPORT_SYMBOL_GPL(mnt_clone_write);
417
418
419
420
421
422
423
424
425int __mnt_want_write_file(struct file *file)
426{
427 if (!(file->f_mode & FMODE_WRITER))
428 return __mnt_want_write(file->f_path.mnt);
429 else
430 return mnt_clone_write(file->f_path.mnt);
431}
432
433
434
435
436
437
438
439
440int mnt_want_write_file(struct file *file)
441{
442 int ret;
443
444 sb_start_write(file->f_path.mnt->mnt_sb);
445 ret = __mnt_want_write_file(file);
446 if (ret)
447 sb_end_write(file->f_path.mnt->mnt_sb);
448 return ret;
449}
450EXPORT_SYMBOL_GPL(mnt_want_write_file);
451
452
453
454
455
456
457
458
459
460void __mnt_drop_write(struct vfsmount *mnt)
461{
462 preempt_disable();
463 mnt_dec_writers(real_mount(mnt));
464 preempt_enable();
465}
466
467
468
469
470
471
472
473
474
475void mnt_drop_write(struct vfsmount *mnt)
476{
477 __mnt_drop_write(mnt);
478 sb_end_write(mnt->mnt_sb);
479}
480EXPORT_SYMBOL_GPL(mnt_drop_write);
481
482void __mnt_drop_write_file(struct file *file)
483{
484 __mnt_drop_write(file->f_path.mnt);
485}
486
487void mnt_drop_write_file(struct file *file)
488{
489 mnt_drop_write(file->f_path.mnt);
490}
491EXPORT_SYMBOL(mnt_drop_write_file);
492
493static int mnt_make_readonly(struct mount *mnt)
494{
495 int ret = 0;
496
497 lock_mount_hash();
498 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
499
500
501
502
503 smp_mb();
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521 if (mnt_get_writers(mnt) > 0)
522 ret = -EBUSY;
523 else
524 mnt->mnt.mnt_flags |= MNT_READONLY;
525
526
527
528
529 smp_wmb();
530 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
531 unlock_mount_hash();
532 return ret;
533}
534
535static void __mnt_unmake_readonly(struct mount *mnt)
536{
537 lock_mount_hash();
538 mnt->mnt.mnt_flags &= ~MNT_READONLY;
539 unlock_mount_hash();
540}
541
542int sb_prepare_remount_readonly(struct super_block *sb)
543{
544 struct mount *mnt;
545 int err = 0;
546
547
548 if (atomic_long_read(&sb->s_remove_count))
549 return -EBUSY;
550
551 lock_mount_hash();
552 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
553 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
554 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
555 smp_mb();
556 if (mnt_get_writers(mnt) > 0) {
557 err = -EBUSY;
558 break;
559 }
560 }
561 }
562 if (!err && atomic_long_read(&sb->s_remove_count))
563 err = -EBUSY;
564
565 if (!err) {
566 sb->s_readonly_remount = 1;
567 smp_wmb();
568 }
569 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
570 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
571 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
572 }
573 unlock_mount_hash();
574
575 return err;
576}
577
578static void free_vfsmnt(struct mount *mnt)
579{
580 kfree_const(mnt->mnt_devname);
581#ifdef CONFIG_SMP
582 free_percpu(mnt->mnt_pcp);
583#endif
584 kmem_cache_free(mnt_cache, mnt);
585}
586
587static void delayed_free_vfsmnt(struct rcu_head *head)
588{
589 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
590}
591
592
593int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
594{
595 struct mount *mnt;
596 if (read_seqretry(&mount_lock, seq))
597 return 1;
598 if (bastard == NULL)
599 return 0;
600 mnt = real_mount(bastard);
601 mnt_add_count(mnt, 1);
602 if (likely(!read_seqretry(&mount_lock, seq)))
603 return 0;
604 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
605 mnt_add_count(mnt, -1);
606 return 1;
607 }
608 return -1;
609}
610
611
612bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
613{
614 int res = __legitimize_mnt(bastard, seq);
615 if (likely(!res))
616 return true;
617 if (unlikely(res < 0)) {
618 rcu_read_unlock();
619 mntput(bastard);
620 rcu_read_lock();
621 }
622 return false;
623}
624
625
626
627
628
629struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
630{
631 struct hlist_head *head = m_hash(mnt, dentry);
632 struct mount *p;
633
634 hlist_for_each_entry_rcu(p, head, mnt_hash)
635 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
636 return p;
637 return NULL;
638}
639
640
641
642
643
644struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
645{
646 struct mount *p, *res = NULL;
647 p = __lookup_mnt(mnt, dentry);
648 if (!p)
649 goto out;
650 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
651 res = p;
652 hlist_for_each_entry_continue(p, mnt_hash) {
653 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
654 break;
655 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
656 res = p;
657 }
658out:
659 return res;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678struct vfsmount *lookup_mnt(struct path *path)
679{
680 struct mount *child_mnt;
681 struct vfsmount *m;
682 unsigned seq;
683
684 rcu_read_lock();
685 do {
686 seq = read_seqbegin(&mount_lock);
687 child_mnt = __lookup_mnt(path->mnt, path->dentry);
688 m = child_mnt ? &child_mnt->mnt : NULL;
689 } while (!legitimize_mnt(m, seq));
690 rcu_read_unlock();
691 return m;
692}
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709bool __is_local_mountpoint(struct dentry *dentry)
710{
711 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
712 struct mount *mnt;
713 bool is_covered = false;
714
715 if (!d_mountpoint(dentry))
716 goto out;
717
718 down_read(&namespace_sem);
719 list_for_each_entry(mnt, &ns->list, mnt_list) {
720 is_covered = (mnt->mnt_mountpoint == dentry);
721 if (is_covered)
722 break;
723 }
724 up_read(&namespace_sem);
725out:
726 return is_covered;
727}
728
729static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
730{
731 struct hlist_head *chain = mp_hash(dentry);
732 struct mountpoint *mp;
733
734 hlist_for_each_entry(mp, chain, m_hash) {
735 if (mp->m_dentry == dentry) {
736
737 if (d_unlinked(dentry))
738 return ERR_PTR(-ENOENT);
739 mp->m_count++;
740 return mp;
741 }
742 }
743 return NULL;
744}
745
746static struct mountpoint *new_mountpoint(struct dentry *dentry)
747{
748 struct hlist_head *chain = mp_hash(dentry);
749 struct mountpoint *mp;
750 int ret;
751
752 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
753 if (!mp)
754 return ERR_PTR(-ENOMEM);
755
756 ret = d_set_mounted(dentry);
757 if (ret) {
758 kfree(mp);
759 return ERR_PTR(ret);
760 }
761
762 mp->m_dentry = dentry;
763 mp->m_count = 1;
764 hlist_add_head(&mp->m_hash, chain);
765 INIT_HLIST_HEAD(&mp->m_list);
766 return mp;
767}
768
769static void put_mountpoint(struct mountpoint *mp)
770{
771 if (!--mp->m_count) {
772 struct dentry *dentry = mp->m_dentry;
773 BUG_ON(!hlist_empty(&mp->m_list));
774 spin_lock(&dentry->d_lock);
775 dentry->d_flags &= ~DCACHE_MOUNTED;
776 spin_unlock(&dentry->d_lock);
777 hlist_del(&mp->m_hash);
778 kfree(mp);
779 }
780}
781
782static inline int check_mnt(struct mount *mnt)
783{
784 return mnt->mnt_ns == current->nsproxy->mnt_ns;
785}
786
787
788
789
790static void touch_mnt_namespace(struct mnt_namespace *ns)
791{
792 if (ns) {
793 ns->event = ++event;
794 wake_up_interruptible(&ns->poll);
795 }
796}
797
798
799
800
801static void __touch_mnt_namespace(struct mnt_namespace *ns)
802{
803 if (ns && ns->event != event) {
804 ns->event = event;
805 wake_up_interruptible(&ns->poll);
806 }
807}
808
809
810
811
812static void unhash_mnt(struct mount *mnt)
813{
814 mnt->mnt_parent = mnt;
815 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
816 list_del_init(&mnt->mnt_child);
817 hlist_del_init_rcu(&mnt->mnt_hash);
818 hlist_del_init(&mnt->mnt_mp_list);
819 put_mountpoint(mnt->mnt_mp);
820 mnt->mnt_mp = NULL;
821}
822
823
824
825
826static void detach_mnt(struct mount *mnt, struct path *old_path)
827{
828 old_path->dentry = mnt->mnt_mountpoint;
829 old_path->mnt = &mnt->mnt_parent->mnt;
830 unhash_mnt(mnt);
831}
832
833
834
835
836static void umount_mnt(struct mount *mnt)
837{
838
839 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
840 unhash_mnt(mnt);
841}
842
843
844
845
846void mnt_set_mountpoint(struct mount *mnt,
847 struct mountpoint *mp,
848 struct mount *child_mnt)
849{
850 mp->m_count++;
851 mnt_add_count(mnt, 1);
852 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
853 child_mnt->mnt_parent = mnt;
854 child_mnt->mnt_mp = mp;
855 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
856}
857
858
859
860
861static void attach_mnt(struct mount *mnt,
862 struct mount *parent,
863 struct mountpoint *mp)
864{
865 mnt_set_mountpoint(parent, mp, mnt);
866 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
867 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
868}
869
870static void attach_shadowed(struct mount *mnt,
871 struct mount *parent,
872 struct mount *shadows)
873{
874 if (shadows) {
875 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
876 list_add(&mnt->mnt_child, &shadows->mnt_child);
877 } else {
878 hlist_add_head_rcu(&mnt->mnt_hash,
879 m_hash(&parent->mnt, mnt->mnt_mountpoint));
880 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
881 }
882}
883
884
885
886
887static void commit_tree(struct mount *mnt, struct mount *shadows)
888{
889 struct mount *parent = mnt->mnt_parent;
890 struct mount *m;
891 LIST_HEAD(head);
892 struct mnt_namespace *n = parent->mnt_ns;
893
894 BUG_ON(parent == mnt);
895
896 list_add_tail(&head, &mnt->mnt_list);
897 list_for_each_entry(m, &head, mnt_list)
898 m->mnt_ns = n;
899
900 list_splice(&head, n->list.prev);
901
902 attach_shadowed(mnt, parent, shadows);
903 touch_mnt_namespace(n);
904}
905
906static struct mount *next_mnt(struct mount *p, struct mount *root)
907{
908 struct list_head *next = p->mnt_mounts.next;
909 if (next == &p->mnt_mounts) {
910 while (1) {
911 if (p == root)
912 return NULL;
913 next = p->mnt_child.next;
914 if (next != &p->mnt_parent->mnt_mounts)
915 break;
916 p = p->mnt_parent;
917 }
918 }
919 return list_entry(next, struct mount, mnt_child);
920}
921
922static struct mount *skip_mnt_tree(struct mount *p)
923{
924 struct list_head *prev = p->mnt_mounts.prev;
925 while (prev != &p->mnt_mounts) {
926 p = list_entry(prev, struct mount, mnt_child);
927 prev = p->mnt_mounts.prev;
928 }
929 return p;
930}
931
932struct vfsmount *
933vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
934{
935 struct mount *mnt;
936 struct dentry *root;
937
938 if (!type)
939 return ERR_PTR(-ENODEV);
940
941 mnt = alloc_vfsmnt(name);
942 if (!mnt)
943 return ERR_PTR(-ENOMEM);
944
945 if (flags & MS_KERNMOUNT)
946 mnt->mnt.mnt_flags = MNT_INTERNAL;
947
948 root = mount_fs(type, flags, name, data);
949 if (IS_ERR(root)) {
950 mnt_free_id(mnt);
951 free_vfsmnt(mnt);
952 return ERR_CAST(root);
953 }
954
955 mnt->mnt.mnt_root = root;
956 mnt->mnt.mnt_sb = root->d_sb;
957 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
958 mnt->mnt_parent = mnt;
959 lock_mount_hash();
960 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
961 unlock_mount_hash();
962 return &mnt->mnt;
963}
964EXPORT_SYMBOL_GPL(vfs_kern_mount);
965
966static struct mount *clone_mnt(struct mount *old, struct dentry *root,
967 int flag)
968{
969 struct super_block *sb = old->mnt.mnt_sb;
970 struct mount *mnt;
971 int err;
972
973 mnt = alloc_vfsmnt(old->mnt_devname);
974 if (!mnt)
975 return ERR_PTR(-ENOMEM);
976
977 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
978 mnt->mnt_group_id = 0;
979 else
980 mnt->mnt_group_id = old->mnt_group_id;
981
982 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
983 err = mnt_alloc_group_id(mnt);
984 if (err)
985 goto out_free;
986 }
987
988 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
989
990 if (flag & CL_UNPRIVILEGED) {
991 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
992
993 if (mnt->mnt.mnt_flags & MNT_READONLY)
994 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
995
996 if (mnt->mnt.mnt_flags & MNT_NODEV)
997 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
998
999 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1000 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1001
1002 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1003 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1004 }
1005
1006
1007 if ((flag & CL_UNPRIVILEGED) &&
1008 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1009 mnt->mnt.mnt_flags |= MNT_LOCKED;
1010
1011 atomic_inc(&sb->s_active);
1012 mnt->mnt.mnt_sb = sb;
1013 mnt->mnt.mnt_root = dget(root);
1014 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1015 mnt->mnt_parent = mnt;
1016 lock_mount_hash();
1017 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1018 unlock_mount_hash();
1019
1020 if ((flag & CL_SLAVE) ||
1021 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1022 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1023 mnt->mnt_master = old;
1024 CLEAR_MNT_SHARED(mnt);
1025 } else if (!(flag & CL_PRIVATE)) {
1026 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1027 list_add(&mnt->mnt_share, &old->mnt_share);
1028 if (IS_MNT_SLAVE(old))
1029 list_add(&mnt->mnt_slave, &old->mnt_slave);
1030 mnt->mnt_master = old->mnt_master;
1031 }
1032 if (flag & CL_MAKE_SHARED)
1033 set_mnt_shared(mnt);
1034
1035
1036
1037 if (flag & CL_EXPIRE) {
1038 if (!list_empty(&old->mnt_expire))
1039 list_add(&mnt->mnt_expire, &old->mnt_expire);
1040 }
1041
1042 return mnt;
1043
1044 out_free:
1045 mnt_free_id(mnt);
1046 free_vfsmnt(mnt);
1047 return ERR_PTR(err);
1048}
1049
1050static void cleanup_mnt(struct mount *mnt)
1051{
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 WARN_ON(mnt_get_writers(mnt));
1063 if (unlikely(mnt->mnt_pins.first))
1064 mnt_pin_kill(mnt);
1065 fsnotify_vfsmount_delete(&mnt->mnt);
1066 dput(mnt->mnt.mnt_root);
1067 deactivate_super(mnt->mnt.mnt_sb);
1068 mnt_free_id(mnt);
1069 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1070}
1071
1072static void __cleanup_mnt(struct rcu_head *head)
1073{
1074 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1075}
1076
1077static LLIST_HEAD(delayed_mntput_list);
1078static void delayed_mntput(struct work_struct *unused)
1079{
1080 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1081 struct llist_node *next;
1082
1083 for (; node; node = next) {
1084 next = llist_next(node);
1085 cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
1086 }
1087}
1088static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1089
1090static void mntput_no_expire(struct mount *mnt)
1091{
1092 rcu_read_lock();
1093 mnt_add_count(mnt, -1);
1094 if (likely(mnt->mnt_ns)) {
1095 rcu_read_unlock();
1096 return;
1097 }
1098 lock_mount_hash();
1099 if (mnt_get_count(mnt)) {
1100 rcu_read_unlock();
1101 unlock_mount_hash();
1102 return;
1103 }
1104 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1105 rcu_read_unlock();
1106 unlock_mount_hash();
1107 return;
1108 }
1109 mnt->mnt.mnt_flags |= MNT_DOOMED;
1110 rcu_read_unlock();
1111
1112 list_del(&mnt->mnt_instance);
1113
1114 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1115 struct mount *p, *tmp;
1116 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1117 umount_mnt(p);
1118 }
1119 }
1120 unlock_mount_hash();
1121
1122 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1123 struct task_struct *task = current;
1124 if (likely(!(task->flags & PF_KTHREAD))) {
1125 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1126 if (!task_work_add(task, &mnt->mnt_rcu, true))
1127 return;
1128 }
1129 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1130 schedule_delayed_work(&delayed_mntput_work, 1);
1131 return;
1132 }
1133 cleanup_mnt(mnt);
1134}
1135
1136void mntput(struct vfsmount *mnt)
1137{
1138 if (mnt) {
1139 struct mount *m = real_mount(mnt);
1140
1141 if (unlikely(m->mnt_expiry_mark))
1142 m->mnt_expiry_mark = 0;
1143 mntput_no_expire(m);
1144 }
1145}
1146EXPORT_SYMBOL(mntput);
1147
1148struct vfsmount *mntget(struct vfsmount *mnt)
1149{
1150 if (mnt)
1151 mnt_add_count(real_mount(mnt), 1);
1152 return mnt;
1153}
1154EXPORT_SYMBOL(mntget);
1155
1156struct vfsmount *mnt_clone_internal(struct path *path)
1157{
1158 struct mount *p;
1159 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1160 if (IS_ERR(p))
1161 return ERR_CAST(p);
1162 p->mnt.mnt_flags |= MNT_INTERNAL;
1163 return &p->mnt;
1164}
1165
1166static inline void mangle(struct seq_file *m, const char *s)
1167{
1168 seq_escape(m, s, " \t\n\\");
1169}
1170
1171
1172
1173
1174
1175
1176
1177int generic_show_options(struct seq_file *m, struct dentry *root)
1178{
1179 const char *options;
1180
1181 rcu_read_lock();
1182 options = rcu_dereference(root->d_sb->s_options);
1183
1184 if (options != NULL && options[0]) {
1185 seq_putc(m, ',');
1186 mangle(m, options);
1187 }
1188 rcu_read_unlock();
1189
1190 return 0;
1191}
1192EXPORT_SYMBOL(generic_show_options);
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207void save_mount_options(struct super_block *sb, char *options)
1208{
1209 BUG_ON(sb->s_options);
1210 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
1211}
1212EXPORT_SYMBOL(save_mount_options);
1213
1214void replace_mount_options(struct super_block *sb, char *options)
1215{
1216 char *old = sb->s_options;
1217 rcu_assign_pointer(sb->s_options, options);
1218 if (old) {
1219 synchronize_rcu();
1220 kfree(old);
1221 }
1222}
1223EXPORT_SYMBOL(replace_mount_options);
1224
1225#ifdef CONFIG_PROC_FS
1226
1227static void *m_start(struct seq_file *m, loff_t *pos)
1228{
1229 struct proc_mounts *p = m->private;
1230
1231 down_read(&namespace_sem);
1232 if (p->cached_event == p->ns->event) {
1233 void *v = p->cached_mount;
1234 if (*pos == p->cached_index)
1235 return v;
1236 if (*pos == p->cached_index + 1) {
1237 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1238 return p->cached_mount = v;
1239 }
1240 }
1241
1242 p->cached_event = p->ns->event;
1243 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1244 p->cached_index = *pos;
1245 return p->cached_mount;
1246}
1247
1248static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1249{
1250 struct proc_mounts *p = m->private;
1251
1252 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1253 p->cached_index = *pos;
1254 return p->cached_mount;
1255}
1256
1257static void m_stop(struct seq_file *m, void *v)
1258{
1259 up_read(&namespace_sem);
1260}
1261
1262static int m_show(struct seq_file *m, void *v)
1263{
1264 struct proc_mounts *p = m->private;
1265 struct mount *r = list_entry(v, struct mount, mnt_list);
1266 return p->show(m, &r->mnt);
1267}
1268
1269const struct seq_operations mounts_op = {
1270 .start = m_start,
1271 .next = m_next,
1272 .stop = m_stop,
1273 .show = m_show,
1274};
1275#endif
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285int may_umount_tree(struct vfsmount *m)
1286{
1287 struct mount *mnt = real_mount(m);
1288 int actual_refs = 0;
1289 int minimum_refs = 0;
1290 struct mount *p;
1291 BUG_ON(!m);
1292
1293
1294 lock_mount_hash();
1295 for (p = mnt; p; p = next_mnt(p, mnt)) {
1296 actual_refs += mnt_get_count(p);
1297 minimum_refs += 2;
1298 }
1299 unlock_mount_hash();
1300
1301 if (actual_refs > minimum_refs)
1302 return 0;
1303
1304 return 1;
1305}
1306
1307EXPORT_SYMBOL(may_umount_tree);
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322int may_umount(struct vfsmount *mnt)
1323{
1324 int ret = 1;
1325 down_read(&namespace_sem);
1326 lock_mount_hash();
1327 if (propagate_mount_busy(real_mount(mnt), 2))
1328 ret = 0;
1329 unlock_mount_hash();
1330 up_read(&namespace_sem);
1331 return ret;
1332}
1333
1334EXPORT_SYMBOL(may_umount);
1335
1336static HLIST_HEAD(unmounted);
1337
1338static void namespace_unlock(void)
1339{
1340 struct hlist_head head;
1341
1342 hlist_move_list(&unmounted, &head);
1343
1344 up_write(&namespace_sem);
1345
1346 if (likely(hlist_empty(&head)))
1347 return;
1348
1349 synchronize_rcu();
1350
1351 group_pin_kill(&head);
1352}
1353
1354static inline void namespace_lock(void)
1355{
1356 down_write(&namespace_sem);
1357}
1358
1359enum umount_tree_flags {
1360 UMOUNT_SYNC = 1,
1361 UMOUNT_PROPAGATE = 2,
1362 UMOUNT_CONNECTED = 4,
1363};
1364
1365static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1366{
1367
1368 if (how & UMOUNT_SYNC)
1369 return true;
1370
1371
1372 if (!mnt_has_parent(mnt))
1373 return true;
1374
1375
1376
1377
1378
1379 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1380 return true;
1381
1382
1383 if (how & UMOUNT_CONNECTED)
1384 return false;
1385
1386
1387 if (IS_MNT_LOCKED(mnt))
1388 return false;
1389
1390
1391 return true;
1392}
1393
1394
1395
1396
1397
1398static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1399{
1400 LIST_HEAD(tmp_list);
1401 struct mount *p;
1402
1403 if (how & UMOUNT_PROPAGATE)
1404 propagate_mount_unlock(mnt);
1405
1406
1407 for (p = mnt; p; p = next_mnt(p, mnt)) {
1408 p->mnt.mnt_flags |= MNT_UMOUNT;
1409 list_move(&p->mnt_list, &tmp_list);
1410 }
1411
1412
1413 list_for_each_entry(p, &tmp_list, mnt_list) {
1414 list_del_init(&p->mnt_child);
1415 }
1416
1417
1418 if (how & UMOUNT_PROPAGATE)
1419 propagate_umount(&tmp_list);
1420
1421 while (!list_empty(&tmp_list)) {
1422 bool disconnect;
1423 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1424 list_del_init(&p->mnt_expire);
1425 list_del_init(&p->mnt_list);
1426 __touch_mnt_namespace(p->mnt_ns);
1427 p->mnt_ns = NULL;
1428 if (how & UMOUNT_SYNC)
1429 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1430
1431 disconnect = disconnect_mount(p, how);
1432
1433 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1434 disconnect ? &unmounted : NULL);
1435 if (mnt_has_parent(p)) {
1436 mnt_add_count(p->mnt_parent, -1);
1437 if (!disconnect) {
1438
1439 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1440 } else {
1441 umount_mnt(p);
1442 }
1443 }
1444 change_mnt_propagation(p, MS_PRIVATE);
1445 }
1446}
1447
1448static void shrink_submounts(struct mount *mnt);
1449
1450static int do_umount(struct mount *mnt, int flags)
1451{
1452 struct super_block *sb = mnt->mnt.mnt_sb;
1453 int retval;
1454
1455 retval = security_sb_umount(&mnt->mnt, flags);
1456 if (retval)
1457 return retval;
1458
1459
1460
1461
1462
1463
1464
1465 if (flags & MNT_EXPIRE) {
1466 if (&mnt->mnt == current->fs->root.mnt ||
1467 flags & (MNT_FORCE | MNT_DETACH))
1468 return -EINVAL;
1469
1470
1471
1472
1473
1474 lock_mount_hash();
1475 if (mnt_get_count(mnt) != 2) {
1476 unlock_mount_hash();
1477 return -EBUSY;
1478 }
1479 unlock_mount_hash();
1480
1481 if (!xchg(&mnt->mnt_expiry_mark, 1))
1482 return -EAGAIN;
1483 }
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1496 sb->s_op->umount_begin(sb);
1497 }
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1509
1510
1511
1512
1513 if (!capable(CAP_SYS_ADMIN))
1514 return -EPERM;
1515 down_write(&sb->s_umount);
1516 if (!(sb->s_flags & MS_RDONLY))
1517 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1518 up_write(&sb->s_umount);
1519 return retval;
1520 }
1521
1522 namespace_lock();
1523 lock_mount_hash();
1524 event++;
1525
1526 if (flags & MNT_DETACH) {
1527 if (!list_empty(&mnt->mnt_list))
1528 umount_tree(mnt, UMOUNT_PROPAGATE);
1529 retval = 0;
1530 } else {
1531 shrink_submounts(mnt);
1532 retval = -EBUSY;
1533 if (!propagate_mount_busy(mnt, 2)) {
1534 if (!list_empty(&mnt->mnt_list))
1535 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1536 retval = 0;
1537 }
1538 }
1539 unlock_mount_hash();
1540 namespace_unlock();
1541 return retval;
1542}
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554void __detach_mounts(struct dentry *dentry)
1555{
1556 struct mountpoint *mp;
1557 struct mount *mnt;
1558
1559 namespace_lock();
1560 mp = lookup_mountpoint(dentry);
1561 if (IS_ERR_OR_NULL(mp))
1562 goto out_unlock;
1563
1564 lock_mount_hash();
1565 while (!hlist_empty(&mp->m_list)) {
1566 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1567 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1568 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1569 umount_mnt(mnt);
1570 }
1571 else umount_tree(mnt, UMOUNT_CONNECTED);
1572 }
1573 unlock_mount_hash();
1574 put_mountpoint(mp);
1575out_unlock:
1576 namespace_unlock();
1577}
1578
1579
1580
1581
1582static inline bool may_mount(void)
1583{
1584 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1585}
1586
1587static inline bool may_mandlock(void)
1588{
1589#ifndef CONFIG_MANDATORY_FILE_LOCKING
1590 return false;
1591#endif
1592 return capable(CAP_SYS_ADMIN);
1593}
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1604{
1605 struct path path;
1606 struct mount *mnt;
1607 int retval;
1608 int lookup_flags = 0;
1609
1610 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1611 return -EINVAL;
1612
1613 if (!may_mount())
1614 return -EPERM;
1615
1616 if (!(flags & UMOUNT_NOFOLLOW))
1617 lookup_flags |= LOOKUP_FOLLOW;
1618
1619 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1620 if (retval)
1621 goto out;
1622 mnt = real_mount(path.mnt);
1623 retval = -EINVAL;
1624 if (path.dentry != path.mnt->mnt_root)
1625 goto dput_and_out;
1626 if (!check_mnt(mnt))
1627 goto dput_and_out;
1628 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1629 goto dput_and_out;
1630 retval = -EPERM;
1631 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1632 goto dput_and_out;
1633
1634 retval = do_umount(mnt, flags);
1635dput_and_out:
1636
1637 dput(path.dentry);
1638 mntput_no_expire(mnt);
1639out:
1640 return retval;
1641}
1642
1643#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1644
1645
1646
1647
1648SYSCALL_DEFINE1(oldumount, char __user *, name)
1649{
1650 return sys_umount(name, 0);
1651}
1652
1653#endif
1654
1655static bool is_mnt_ns_file(struct dentry *dentry)
1656{
1657
1658 return dentry->d_op == &ns_dentry_operations &&
1659 dentry->d_fsdata == &mntns_operations;
1660}
1661
1662struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1663{
1664 return container_of(ns, struct mnt_namespace, ns);
1665}
1666
1667static bool mnt_ns_loop(struct dentry *dentry)
1668{
1669
1670
1671
1672 struct mnt_namespace *mnt_ns;
1673 if (!is_mnt_ns_file(dentry))
1674 return false;
1675
1676 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1677 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1678}
1679
1680struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1681 int flag)
1682{
1683 struct mount *res, *p, *q, *r, *parent;
1684
1685 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1686 return ERR_PTR(-EINVAL);
1687
1688 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1689 return ERR_PTR(-EINVAL);
1690
1691 res = q = clone_mnt(mnt, dentry, flag);
1692 if (IS_ERR(q))
1693 return q;
1694
1695 q->mnt_mountpoint = mnt->mnt_mountpoint;
1696
1697 p = mnt;
1698 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1699 struct mount *s;
1700 if (!is_subdir(r->mnt_mountpoint, dentry))
1701 continue;
1702
1703 for (s = r; s; s = next_mnt(s, r)) {
1704 struct mount *t = NULL;
1705 if (!(flag & CL_COPY_UNBINDABLE) &&
1706 IS_MNT_UNBINDABLE(s)) {
1707 s = skip_mnt_tree(s);
1708 continue;
1709 }
1710 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1711 is_mnt_ns_file(s->mnt.mnt_root)) {
1712 s = skip_mnt_tree(s);
1713 continue;
1714 }
1715 while (p != s->mnt_parent) {
1716 p = p->mnt_parent;
1717 q = q->mnt_parent;
1718 }
1719 p = s;
1720 parent = q;
1721 q = clone_mnt(p, p->mnt.mnt_root, flag);
1722 if (IS_ERR(q))
1723 goto out;
1724 lock_mount_hash();
1725 list_add_tail(&q->mnt_list, &res->mnt_list);
1726 mnt_set_mountpoint(parent, p->mnt_mp, q);
1727 if (!list_empty(&parent->mnt_mounts)) {
1728 t = list_last_entry(&parent->mnt_mounts,
1729 struct mount, mnt_child);
1730 if (t->mnt_mp != p->mnt_mp)
1731 t = NULL;
1732 }
1733 attach_shadowed(q, parent, t);
1734 unlock_mount_hash();
1735 }
1736 }
1737 return res;
1738out:
1739 if (res) {
1740 lock_mount_hash();
1741 umount_tree(res, UMOUNT_SYNC);
1742 unlock_mount_hash();
1743 }
1744 return q;
1745}
1746
1747
1748
1749struct vfsmount *collect_mounts(struct path *path)
1750{
1751 struct mount *tree;
1752 namespace_lock();
1753 if (!check_mnt(real_mount(path->mnt)))
1754 tree = ERR_PTR(-EINVAL);
1755 else
1756 tree = copy_tree(real_mount(path->mnt), path->dentry,
1757 CL_COPY_ALL | CL_PRIVATE);
1758 namespace_unlock();
1759 if (IS_ERR(tree))
1760 return ERR_CAST(tree);
1761 return &tree->mnt;
1762}
1763
1764void drop_collected_mounts(struct vfsmount *mnt)
1765{
1766 namespace_lock();
1767 lock_mount_hash();
1768 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1769 unlock_mount_hash();
1770 namespace_unlock();
1771}
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782struct vfsmount *clone_private_mount(struct path *path)
1783{
1784 struct mount *old_mnt = real_mount(path->mnt);
1785 struct mount *new_mnt;
1786
1787 if (IS_MNT_UNBINDABLE(old_mnt))
1788 return ERR_PTR(-EINVAL);
1789
1790 down_read(&namespace_sem);
1791 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1792 up_read(&namespace_sem);
1793 if (IS_ERR(new_mnt))
1794 return ERR_CAST(new_mnt);
1795
1796 return &new_mnt->mnt;
1797}
1798EXPORT_SYMBOL_GPL(clone_private_mount);
1799
1800int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1801 struct vfsmount *root)
1802{
1803 struct mount *mnt;
1804 int res = f(root, arg);
1805 if (res)
1806 return res;
1807 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1808 res = f(&mnt->mnt, arg);
1809 if (res)
1810 return res;
1811 }
1812 return 0;
1813}
1814
1815static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1816{
1817 struct mount *p;
1818
1819 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1820 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1821 mnt_release_group_id(p);
1822 }
1823}
1824
1825static int invent_group_ids(struct mount *mnt, bool recurse)
1826{
1827 struct mount *p;
1828
1829 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1830 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1831 int err = mnt_alloc_group_id(p);
1832 if (err) {
1833 cleanup_group_ids(mnt, p);
1834 return err;
1835 }
1836 }
1837 }
1838
1839 return 0;
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905static int attach_recursive_mnt(struct mount *source_mnt,
1906 struct mount *dest_mnt,
1907 struct mountpoint *dest_mp,
1908 struct path *parent_path)
1909{
1910 HLIST_HEAD(tree_list);
1911 struct mount *child, *p;
1912 struct hlist_node *n;
1913 int err;
1914
1915 if (IS_MNT_SHARED(dest_mnt)) {
1916 err = invent_group_ids(source_mnt, true);
1917 if (err)
1918 goto out;
1919 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1920 lock_mount_hash();
1921 if (err)
1922 goto out_cleanup_ids;
1923 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1924 set_mnt_shared(p);
1925 } else {
1926 lock_mount_hash();
1927 }
1928 if (parent_path) {
1929 detach_mnt(source_mnt, parent_path);
1930 attach_mnt(source_mnt, dest_mnt, dest_mp);
1931 touch_mnt_namespace(source_mnt->mnt_ns);
1932 } else {
1933 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1934 commit_tree(source_mnt, NULL);
1935 }
1936
1937 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
1938 struct mount *q;
1939 hlist_del_init(&child->mnt_hash);
1940 q = __lookup_mnt_last(&child->mnt_parent->mnt,
1941 child->mnt_mountpoint);
1942 commit_tree(child, q);
1943 }
1944 unlock_mount_hash();
1945
1946 return 0;
1947
1948 out_cleanup_ids:
1949 while (!hlist_empty(&tree_list)) {
1950 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1951 umount_tree(child, UMOUNT_SYNC);
1952 }
1953 unlock_mount_hash();
1954 cleanup_group_ids(source_mnt, NULL);
1955 out:
1956 return err;
1957}
1958
1959static struct mountpoint *lock_mount(struct path *path)
1960{
1961 struct vfsmount *mnt;
1962 struct dentry *dentry = path->dentry;
1963retry:
1964 inode_lock(dentry->d_inode);
1965 if (unlikely(cant_mount(dentry))) {
1966 inode_unlock(dentry->d_inode);
1967 return ERR_PTR(-ENOENT);
1968 }
1969 namespace_lock();
1970 mnt = lookup_mnt(path);
1971 if (likely(!mnt)) {
1972 struct mountpoint *mp = lookup_mountpoint(dentry);
1973 if (!mp)
1974 mp = new_mountpoint(dentry);
1975 if (IS_ERR(mp)) {
1976 namespace_unlock();
1977 inode_unlock(dentry->d_inode);
1978 return mp;
1979 }
1980 return mp;
1981 }
1982 namespace_unlock();
1983 inode_unlock(path->dentry->d_inode);
1984 path_put(path);
1985 path->mnt = mnt;
1986 dentry = path->dentry = dget(mnt->mnt_root);
1987 goto retry;
1988}
1989
1990static void unlock_mount(struct mountpoint *where)
1991{
1992 struct dentry *dentry = where->m_dentry;
1993 put_mountpoint(where);
1994 namespace_unlock();
1995 inode_unlock(dentry->d_inode);
1996}
1997
1998static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1999{
2000 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
2001 return -EINVAL;
2002
2003 if (d_is_dir(mp->m_dentry) !=
2004 d_is_dir(mnt->mnt.mnt_root))
2005 return -ENOTDIR;
2006
2007 return attach_recursive_mnt(mnt, p, mp, NULL);
2008}
2009
2010
2011
2012
2013
2014static int flags_to_propagation_type(int flags)
2015{
2016 int type = flags & ~(MS_REC | MS_SILENT);
2017
2018
2019 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2020 return 0;
2021
2022 if (!is_power_of_2(type))
2023 return 0;
2024 return type;
2025}
2026
2027
2028
2029
2030static int do_change_type(struct path *path, int flag)
2031{
2032 struct mount *m;
2033 struct mount *mnt = real_mount(path->mnt);
2034 int recurse = flag & MS_REC;
2035 int type;
2036 int err = 0;
2037
2038 if (path->dentry != path->mnt->mnt_root)
2039 return -EINVAL;
2040
2041 type = flags_to_propagation_type(flag);
2042 if (!type)
2043 return -EINVAL;
2044
2045 namespace_lock();
2046 if (type == MS_SHARED) {
2047 err = invent_group_ids(mnt, recurse);
2048 if (err)
2049 goto out_unlock;
2050 }
2051
2052 lock_mount_hash();
2053 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2054 change_mnt_propagation(m, type);
2055 unlock_mount_hash();
2056
2057 out_unlock:
2058 namespace_unlock();
2059 return err;
2060}
2061
2062static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2063{
2064 struct mount *child;
2065 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2066 if (!is_subdir(child->mnt_mountpoint, dentry))
2067 continue;
2068
2069 if (child->mnt.mnt_flags & MNT_LOCKED)
2070 return true;
2071 }
2072 return false;
2073}
2074
2075
2076
2077
2078static int do_loopback(struct path *path, const char *old_name,
2079 int recurse)
2080{
2081 struct path old_path;
2082 struct mount *mnt = NULL, *old, *parent;
2083 struct mountpoint *mp;
2084 int err;
2085 if (!old_name || !*old_name)
2086 return -EINVAL;
2087 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2088 if (err)
2089 return err;
2090
2091 err = -EINVAL;
2092 if (mnt_ns_loop(old_path.dentry))
2093 goto out;
2094
2095 mp = lock_mount(path);
2096 err = PTR_ERR(mp);
2097 if (IS_ERR(mp))
2098 goto out;
2099
2100 old = real_mount(old_path.mnt);
2101 parent = real_mount(path->mnt);
2102
2103 err = -EINVAL;
2104 if (IS_MNT_UNBINDABLE(old))
2105 goto out2;
2106
2107 if (!check_mnt(parent))
2108 goto out2;
2109
2110 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2111 goto out2;
2112
2113 if (!recurse && has_locked_children(old, old_path.dentry))
2114 goto out2;
2115
2116 if (recurse)
2117 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2118 else
2119 mnt = clone_mnt(old, old_path.dentry, 0);
2120
2121 if (IS_ERR(mnt)) {
2122 err = PTR_ERR(mnt);
2123 goto out2;
2124 }
2125
2126 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2127
2128 err = graft_tree(mnt, parent, mp);
2129 if (err) {
2130 lock_mount_hash();
2131 umount_tree(mnt, UMOUNT_SYNC);
2132 unlock_mount_hash();
2133 }
2134out2:
2135 unlock_mount(mp);
2136out:
2137 path_put(&old_path);
2138 return err;
2139}
2140
2141static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2142{
2143 int error = 0;
2144 int readonly_request = 0;
2145
2146 if (ms_flags & MS_RDONLY)
2147 readonly_request = 1;
2148 if (readonly_request == __mnt_is_readonly(mnt))
2149 return 0;
2150
2151 if (readonly_request)
2152 error = mnt_make_readonly(real_mount(mnt));
2153 else
2154 __mnt_unmake_readonly(real_mount(mnt));
2155 return error;
2156}
2157
2158
2159
2160
2161
2162
2163static int do_remount(struct path *path, int flags, int mnt_flags,
2164 void *data)
2165{
2166 int err;
2167 struct super_block *sb = path->mnt->mnt_sb;
2168 struct mount *mnt = real_mount(path->mnt);
2169
2170 if (!check_mnt(mnt))
2171 return -EINVAL;
2172
2173 if (path->dentry != path->mnt->mnt_root)
2174 return -EINVAL;
2175
2176
2177
2178
2179
2180
2181
2182 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2183 !(mnt_flags & MNT_READONLY)) {
2184 return -EPERM;
2185 }
2186 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2187 !(mnt_flags & MNT_NODEV)) {
2188
2189 if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
2190 !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2191 mnt_flags |= MNT_NODEV;
2192 } else {
2193 return -EPERM;
2194 }
2195 }
2196 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2197 !(mnt_flags & MNT_NOSUID)) {
2198 return -EPERM;
2199 }
2200 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2201 !(mnt_flags & MNT_NOEXEC)) {
2202 return -EPERM;
2203 }
2204 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2205 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2206 return -EPERM;
2207 }
2208
2209 err = security_sb_remount(sb, data);
2210 if (err)
2211 return err;
2212
2213 down_write(&sb->s_umount);
2214 if (flags & MS_BIND)
2215 err = change_mount_flags(path->mnt, flags);
2216 else if (!capable(CAP_SYS_ADMIN))
2217 err = -EPERM;
2218 else
2219 err = do_remount_sb(sb, flags, data, 0);
2220 if (!err) {
2221 lock_mount_hash();
2222 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2223 mnt->mnt.mnt_flags = mnt_flags;
2224 touch_mnt_namespace(mnt->mnt_ns);
2225 unlock_mount_hash();
2226 }
2227 up_write(&sb->s_umount);
2228 return err;
2229}
2230
2231static inline int tree_contains_unbindable(struct mount *mnt)
2232{
2233 struct mount *p;
2234 for (p = mnt; p; p = next_mnt(p, mnt)) {
2235 if (IS_MNT_UNBINDABLE(p))
2236 return 1;
2237 }
2238 return 0;
2239}
2240
2241static int do_move_mount(struct path *path, const char *old_name)
2242{
2243 struct path old_path, parent_path;
2244 struct mount *p;
2245 struct mount *old;
2246 struct mountpoint *mp;
2247 int err;
2248 if (!old_name || !*old_name)
2249 return -EINVAL;
2250 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2251 if (err)
2252 return err;
2253
2254 mp = lock_mount(path);
2255 err = PTR_ERR(mp);
2256 if (IS_ERR(mp))
2257 goto out;
2258
2259 old = real_mount(old_path.mnt);
2260 p = real_mount(path->mnt);
2261
2262 err = -EINVAL;
2263 if (!check_mnt(p) || !check_mnt(old))
2264 goto out1;
2265
2266 if (old->mnt.mnt_flags & MNT_LOCKED)
2267 goto out1;
2268
2269 err = -EINVAL;
2270 if (old_path.dentry != old_path.mnt->mnt_root)
2271 goto out1;
2272
2273 if (!mnt_has_parent(old))
2274 goto out1;
2275
2276 if (d_is_dir(path->dentry) !=
2277 d_is_dir(old_path.dentry))
2278 goto out1;
2279
2280
2281
2282 if (IS_MNT_SHARED(old->mnt_parent))
2283 goto out1;
2284
2285
2286
2287
2288 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2289 goto out1;
2290 err = -ELOOP;
2291 for (; mnt_has_parent(p); p = p->mnt_parent)
2292 if (p == old)
2293 goto out1;
2294
2295 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2296 if (err)
2297 goto out1;
2298
2299
2300
2301 list_del_init(&old->mnt_expire);
2302out1:
2303 unlock_mount(mp);
2304out:
2305 if (!err)
2306 path_put(&parent_path);
2307 path_put(&old_path);
2308 return err;
2309}
2310
2311static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2312{
2313 int err;
2314 const char *subtype = strchr(fstype, '.');
2315 if (subtype) {
2316 subtype++;
2317 err = -EINVAL;
2318 if (!subtype[0])
2319 goto err;
2320 } else
2321 subtype = "";
2322
2323 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2324 err = -ENOMEM;
2325 if (!mnt->mnt_sb->s_subtype)
2326 goto err;
2327 return mnt;
2328
2329 err:
2330 mntput(mnt);
2331 return ERR_PTR(err);
2332}
2333
2334
2335
2336
2337static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2338{
2339 struct mountpoint *mp;
2340 struct mount *parent;
2341 int err;
2342
2343 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2344
2345 mp = lock_mount(path);
2346 if (IS_ERR(mp))
2347 return PTR_ERR(mp);
2348
2349 parent = real_mount(path->mnt);
2350 err = -EINVAL;
2351 if (unlikely(!check_mnt(parent))) {
2352
2353 if (!(mnt_flags & MNT_SHRINKABLE))
2354 goto unlock;
2355
2356 if (!parent->mnt_ns)
2357 goto unlock;
2358 }
2359
2360
2361 err = -EBUSY;
2362 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2363 path->mnt->mnt_root == path->dentry)
2364 goto unlock;
2365
2366 err = -EINVAL;
2367 if (d_is_symlink(newmnt->mnt.mnt_root))
2368 goto unlock;
2369
2370 newmnt->mnt.mnt_flags = mnt_flags;
2371 err = graft_tree(newmnt, parent, mp);
2372
2373unlock:
2374 unlock_mount(mp);
2375 return err;
2376}
2377
2378static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
2379
2380
2381
2382
2383
2384static int do_new_mount(struct path *path, const char *fstype, int flags,
2385 int mnt_flags, const char *name, void *data)
2386{
2387 struct file_system_type *type;
2388 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2389 struct vfsmount *mnt;
2390 int err;
2391
2392 if (!fstype)
2393 return -EINVAL;
2394
2395 type = get_fs_type(fstype);
2396 if (!type)
2397 return -ENODEV;
2398
2399 if (user_ns != &init_user_ns) {
2400 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
2401 put_filesystem(type);
2402 return -EPERM;
2403 }
2404
2405
2406
2407 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2408 flags |= MS_NODEV;
2409 mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
2410 }
2411 if (type->fs_flags & FS_USERNS_VISIBLE) {
2412 if (!fs_fully_visible(type, &mnt_flags))
2413 return -EPERM;
2414 }
2415 }
2416
2417 mnt = vfs_kern_mount(type, flags, name, data);
2418 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2419 !mnt->mnt_sb->s_subtype)
2420 mnt = fs_set_subtype(mnt, fstype);
2421
2422 put_filesystem(type);
2423 if (IS_ERR(mnt))
2424 return PTR_ERR(mnt);
2425
2426 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2427 if (err)
2428 mntput(mnt);
2429 return err;
2430}
2431
2432int finish_automount(struct vfsmount *m, struct path *path)
2433{
2434 struct mount *mnt = real_mount(m);
2435 int err;
2436
2437
2438
2439 BUG_ON(mnt_get_count(mnt) < 2);
2440
2441 if (m->mnt_sb == path->mnt->mnt_sb &&
2442 m->mnt_root == path->dentry) {
2443 err = -ELOOP;
2444 goto fail;
2445 }
2446
2447 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2448 if (!err)
2449 return 0;
2450fail:
2451
2452 if (!list_empty(&mnt->mnt_expire)) {
2453 namespace_lock();
2454 list_del_init(&mnt->mnt_expire);
2455 namespace_unlock();
2456 }
2457 mntput(m);
2458 mntput(m);
2459 return err;
2460}
2461
2462
2463
2464
2465
2466
2467void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2468{
2469 namespace_lock();
2470
2471 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2472
2473 namespace_unlock();
2474}
2475EXPORT_SYMBOL(mnt_set_expiry);
2476
2477
2478
2479
2480
2481
2482void mark_mounts_for_expiry(struct list_head *mounts)
2483{
2484 struct mount *mnt, *next;
2485 LIST_HEAD(graveyard);
2486
2487 if (list_empty(mounts))
2488 return;
2489
2490 namespace_lock();
2491 lock_mount_hash();
2492
2493
2494
2495
2496
2497
2498
2499 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2500 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2501 propagate_mount_busy(mnt, 1))
2502 continue;
2503 list_move(&mnt->mnt_expire, &graveyard);
2504 }
2505 while (!list_empty(&graveyard)) {
2506 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2507 touch_mnt_namespace(mnt->mnt_ns);
2508 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2509 }
2510 unlock_mount_hash();
2511 namespace_unlock();
2512}
2513
2514EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2515
2516
2517
2518
2519
2520
2521
2522static int select_submounts(struct mount *parent, struct list_head *graveyard)
2523{
2524 struct mount *this_parent = parent;
2525 struct list_head *next;
2526 int found = 0;
2527
2528repeat:
2529 next = this_parent->mnt_mounts.next;
2530resume:
2531 while (next != &this_parent->mnt_mounts) {
2532 struct list_head *tmp = next;
2533 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2534
2535 next = tmp->next;
2536 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2537 continue;
2538
2539
2540
2541 if (!list_empty(&mnt->mnt_mounts)) {
2542 this_parent = mnt;
2543 goto repeat;
2544 }
2545
2546 if (!propagate_mount_busy(mnt, 1)) {
2547 list_move_tail(&mnt->mnt_expire, graveyard);
2548 found++;
2549 }
2550 }
2551
2552
2553
2554 if (this_parent != parent) {
2555 next = this_parent->mnt_child.next;
2556 this_parent = this_parent->mnt_parent;
2557 goto resume;
2558 }
2559 return found;
2560}
2561
2562
2563
2564
2565
2566
2567
2568static void shrink_submounts(struct mount *mnt)
2569{
2570 LIST_HEAD(graveyard);
2571 struct mount *m;
2572
2573
2574 while (select_submounts(mnt, &graveyard)) {
2575 while (!list_empty(&graveyard)) {
2576 m = list_first_entry(&graveyard, struct mount,
2577 mnt_expire);
2578 touch_mnt_namespace(m->mnt_ns);
2579 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2580 }
2581 }
2582}
2583
2584
2585
2586
2587
2588
2589
2590static long exact_copy_from_user(void *to, const void __user * from,
2591 unsigned long n)
2592{
2593 char *t = to;
2594 const char __user *f = from;
2595 char c;
2596
2597 if (!access_ok(VERIFY_READ, from, n))
2598 return n;
2599
2600 while (n) {
2601 if (__get_user(c, f)) {
2602 memset(t, 0, n);
2603 break;
2604 }
2605 *t++ = c;
2606 f++;
2607 n--;
2608 }
2609 return n;
2610}
2611
2612void *copy_mount_options(const void __user * data)
2613{
2614 int i;
2615 unsigned long size;
2616 char *copy;
2617
2618 if (!data)
2619 return NULL;
2620
2621 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2622 if (!copy)
2623 return ERR_PTR(-ENOMEM);
2624
2625
2626
2627
2628
2629
2630 size = TASK_SIZE - (unsigned long)data;
2631 if (size > PAGE_SIZE)
2632 size = PAGE_SIZE;
2633
2634 i = size - exact_copy_from_user(copy, data, size);
2635 if (!i) {
2636 kfree(copy);
2637 return ERR_PTR(-EFAULT);
2638 }
2639 if (i != PAGE_SIZE)
2640 memset(copy + i, 0, PAGE_SIZE - i);
2641 return copy;
2642}
2643
2644char *copy_mount_string(const void __user *data)
2645{
2646 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2647}
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663long do_mount(const char *dev_name, const char __user *dir_name,
2664 const char *type_page, unsigned long flags, void *data_page)
2665{
2666 struct path path;
2667 int retval = 0;
2668 int mnt_flags = 0;
2669
2670
2671 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2672 flags &= ~MS_MGC_MSK;
2673
2674
2675 if (data_page)
2676 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2677
2678
2679 retval = user_path(dir_name, &path);
2680 if (retval)
2681 return retval;
2682
2683 retval = security_sb_mount(dev_name, &path,
2684 type_page, flags, data_page);
2685 if (!retval && !may_mount())
2686 retval = -EPERM;
2687 if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
2688 retval = -EPERM;
2689 if (retval)
2690 goto dput_out;
2691
2692
2693 if (!(flags & MS_NOATIME))
2694 mnt_flags |= MNT_RELATIME;
2695
2696
2697 if (flags & MS_NOSUID)
2698 mnt_flags |= MNT_NOSUID;
2699 if (flags & MS_NODEV)
2700 mnt_flags |= MNT_NODEV;
2701 if (flags & MS_NOEXEC)
2702 mnt_flags |= MNT_NOEXEC;
2703 if (flags & MS_NOATIME)
2704 mnt_flags |= MNT_NOATIME;
2705 if (flags & MS_NODIRATIME)
2706 mnt_flags |= MNT_NODIRATIME;
2707 if (flags & MS_STRICTATIME)
2708 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2709 if (flags & MS_RDONLY)
2710 mnt_flags |= MNT_READONLY;
2711
2712
2713 if ((flags & MS_REMOUNT) &&
2714 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2715 MS_STRICTATIME)) == 0)) {
2716 mnt_flags &= ~MNT_ATIME_MASK;
2717 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2718 }
2719
2720 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2721 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2722 MS_STRICTATIME);
2723
2724 if (flags & MS_REMOUNT)
2725 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2726 data_page);
2727 else if (flags & MS_BIND)
2728 retval = do_loopback(&path, dev_name, flags & MS_REC);
2729 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2730 retval = do_change_type(&path, flags);
2731 else if (flags & MS_MOVE)
2732 retval = do_move_mount(&path, dev_name);
2733 else
2734 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2735 dev_name, data_page);
2736dput_out:
2737 path_put(&path);
2738 return retval;
2739}
2740
2741static void free_mnt_ns(struct mnt_namespace *ns)
2742{
2743 ns_free_inum(&ns->ns);
2744 put_user_ns(ns->user_ns);
2745 kfree(ns);
2746}
2747
2748
2749
2750
2751
2752
2753
2754
2755static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2756
2757static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2758{
2759 struct mnt_namespace *new_ns;
2760 int ret;
2761
2762 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2763 if (!new_ns)
2764 return ERR_PTR(-ENOMEM);
2765 ret = ns_alloc_inum(&new_ns->ns);
2766 if (ret) {
2767 kfree(new_ns);
2768 return ERR_PTR(ret);
2769 }
2770 new_ns->ns.ops = &mntns_operations;
2771 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2772 atomic_set(&new_ns->count, 1);
2773 new_ns->root = NULL;
2774 INIT_LIST_HEAD(&new_ns->list);
2775 init_waitqueue_head(&new_ns->poll);
2776 new_ns->event = 0;
2777 new_ns->user_ns = get_user_ns(user_ns);
2778 return new_ns;
2779}
2780
2781struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2782 struct user_namespace *user_ns, struct fs_struct *new_fs)
2783{
2784 struct mnt_namespace *new_ns;
2785 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2786 struct mount *p, *q;
2787 struct mount *old;
2788 struct mount *new;
2789 int copy_flags;
2790
2791 BUG_ON(!ns);
2792
2793 if (likely(!(flags & CLONE_NEWNS))) {
2794 get_mnt_ns(ns);
2795 return ns;
2796 }
2797
2798 old = ns->root;
2799
2800 new_ns = alloc_mnt_ns(user_ns);
2801 if (IS_ERR(new_ns))
2802 return new_ns;
2803
2804 namespace_lock();
2805
2806 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2807 if (user_ns != ns->user_ns)
2808 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2809 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2810 if (IS_ERR(new)) {
2811 namespace_unlock();
2812 free_mnt_ns(new_ns);
2813 return ERR_CAST(new);
2814 }
2815 new_ns->root = new;
2816 list_add_tail(&new_ns->list, &new->mnt_list);
2817
2818
2819
2820
2821
2822
2823 p = old;
2824 q = new;
2825 while (p) {
2826 q->mnt_ns = new_ns;
2827 if (new_fs) {
2828 if (&p->mnt == new_fs->root.mnt) {
2829 new_fs->root.mnt = mntget(&q->mnt);
2830 rootmnt = &p->mnt;
2831 }
2832 if (&p->mnt == new_fs->pwd.mnt) {
2833 new_fs->pwd.mnt = mntget(&q->mnt);
2834 pwdmnt = &p->mnt;
2835 }
2836 }
2837 p = next_mnt(p, old);
2838 q = next_mnt(q, new);
2839 if (!q)
2840 break;
2841 while (p->mnt.mnt_root != q->mnt.mnt_root)
2842 p = next_mnt(p, old);
2843 }
2844 namespace_unlock();
2845
2846 if (rootmnt)
2847 mntput(rootmnt);
2848 if (pwdmnt)
2849 mntput(pwdmnt);
2850
2851 return new_ns;
2852}
2853
2854
2855
2856
2857
2858static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2859{
2860 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2861 if (!IS_ERR(new_ns)) {
2862 struct mount *mnt = real_mount(m);
2863 mnt->mnt_ns = new_ns;
2864 new_ns->root = mnt;
2865 list_add(&mnt->mnt_list, &new_ns->list);
2866 } else {
2867 mntput(m);
2868 }
2869 return new_ns;
2870}
2871
2872struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2873{
2874 struct mnt_namespace *ns;
2875 struct super_block *s;
2876 struct path path;
2877 int err;
2878
2879 ns = create_mnt_ns(mnt);
2880 if (IS_ERR(ns))
2881 return ERR_CAST(ns);
2882
2883 err = vfs_path_lookup(mnt->mnt_root, mnt,
2884 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2885
2886 put_mnt_ns(ns);
2887
2888 if (err)
2889 return ERR_PTR(err);
2890
2891
2892 s = path.mnt->mnt_sb;
2893 atomic_inc(&s->s_active);
2894 mntput(path.mnt);
2895
2896 down_write(&s->s_umount);
2897
2898 return path.dentry;
2899}
2900EXPORT_SYMBOL(mount_subtree);
2901
2902SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2903 char __user *, type, unsigned long, flags, void __user *, data)
2904{
2905 int ret;
2906 char *kernel_type;
2907 char *kernel_dev;
2908 void *options;
2909
2910 kernel_type = copy_mount_string(type);
2911 ret = PTR_ERR(kernel_type);
2912 if (IS_ERR(kernel_type))
2913 goto out_type;
2914
2915 kernel_dev = copy_mount_string(dev_name);
2916 ret = PTR_ERR(kernel_dev);
2917 if (IS_ERR(kernel_dev))
2918 goto out_dev;
2919
2920 options = copy_mount_options(data);
2921 ret = PTR_ERR(options);
2922 if (IS_ERR(options))
2923 goto out_data;
2924
2925 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
2926
2927 kfree(options);
2928out_data:
2929 kfree(kernel_dev);
2930out_dev:
2931 kfree(kernel_type);
2932out_type:
2933 return ret;
2934}
2935
2936
2937
2938
2939
2940
2941bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2942 const struct path *root)
2943{
2944 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2945 dentry = mnt->mnt_mountpoint;
2946 mnt = mnt->mnt_parent;
2947 }
2948 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2949}
2950
2951bool path_is_under(struct path *path1, struct path *path2)
2952{
2953 bool res;
2954 read_seqlock_excl(&mount_lock);
2955 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2956 read_sequnlock_excl(&mount_lock);
2957 return res;
2958}
2959EXPORT_SYMBOL(path_is_under);
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2987 const char __user *, put_old)
2988{
2989 struct path new, old, parent_path, root_parent, root;
2990 struct mount *new_mnt, *root_mnt, *old_mnt;
2991 struct mountpoint *old_mp, *root_mp;
2992 int error;
2993
2994 if (!may_mount())
2995 return -EPERM;
2996
2997 error = user_path_dir(new_root, &new);
2998 if (error)
2999 goto out0;
3000
3001 error = user_path_dir(put_old, &old);
3002 if (error)
3003 goto out1;
3004
3005 error = security_sb_pivotroot(&old, &new);
3006 if (error)
3007 goto out2;
3008
3009 get_fs_root(current->fs, &root);
3010 old_mp = lock_mount(&old);
3011 error = PTR_ERR(old_mp);
3012 if (IS_ERR(old_mp))
3013 goto out3;
3014
3015 error = -EINVAL;
3016 new_mnt = real_mount(new.mnt);
3017 root_mnt = real_mount(root.mnt);
3018 old_mnt = real_mount(old.mnt);
3019 if (IS_MNT_SHARED(old_mnt) ||
3020 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3021 IS_MNT_SHARED(root_mnt->mnt_parent))
3022 goto out4;
3023 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3024 goto out4;
3025 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3026 goto out4;
3027 error = -ENOENT;
3028 if (d_unlinked(new.dentry))
3029 goto out4;
3030 error = -EBUSY;
3031 if (new_mnt == root_mnt || old_mnt == root_mnt)
3032 goto out4;
3033 error = -EINVAL;
3034 if (root.mnt->mnt_root != root.dentry)
3035 goto out4;
3036 if (!mnt_has_parent(root_mnt))
3037 goto out4;
3038 root_mp = root_mnt->mnt_mp;
3039 if (new.mnt->mnt_root != new.dentry)
3040 goto out4;
3041 if (!mnt_has_parent(new_mnt))
3042 goto out4;
3043
3044 if (!is_path_reachable(old_mnt, old.dentry, &new))
3045 goto out4;
3046
3047 if (!is_path_reachable(new_mnt, new.dentry, &root))
3048 goto out4;
3049 root_mp->m_count++;
3050 lock_mount_hash();
3051 detach_mnt(new_mnt, &parent_path);
3052 detach_mnt(root_mnt, &root_parent);
3053 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3054 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3055 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3056 }
3057
3058 attach_mnt(root_mnt, old_mnt, old_mp);
3059
3060 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3061 touch_mnt_namespace(current->nsproxy->mnt_ns);
3062
3063 list_del_init(&new_mnt->mnt_expire);
3064 unlock_mount_hash();
3065 chroot_fs_refs(&root, &new);
3066 put_mountpoint(root_mp);
3067 error = 0;
3068out4:
3069 unlock_mount(old_mp);
3070 if (!error) {
3071 path_put(&root_parent);
3072 path_put(&parent_path);
3073 }
3074out3:
3075 path_put(&root);
3076out2:
3077 path_put(&old);
3078out1:
3079 path_put(&new);
3080out0:
3081 return error;
3082}
3083
3084static void __init init_mount_tree(void)
3085{
3086 struct vfsmount *mnt;
3087 struct mnt_namespace *ns;
3088 struct path root;
3089 struct file_system_type *type;
3090
3091 type = get_fs_type("rootfs");
3092 if (!type)
3093 panic("Can't find rootfs type");
3094 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3095 put_filesystem(type);
3096 if (IS_ERR(mnt))
3097 panic("Can't create rootfs");
3098
3099 ns = create_mnt_ns(mnt);
3100 if (IS_ERR(ns))
3101 panic("Can't allocate initial namespace");
3102
3103 init_task.nsproxy->mnt_ns = ns;
3104 get_mnt_ns(ns);
3105
3106 root.mnt = mnt;
3107 root.dentry = mnt->mnt_root;
3108 mnt->mnt_flags |= MNT_LOCKED;
3109
3110 set_fs_pwd(current->fs, &root);
3111 set_fs_root(current->fs, &root);
3112}
3113
3114void __init mnt_init(void)
3115{
3116 unsigned u;
3117 int err;
3118
3119 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3120 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3121
3122 mount_hashtable = alloc_large_system_hash("Mount-cache",
3123 sizeof(struct hlist_head),
3124 mhash_entries, 19,
3125 0,
3126 &m_hash_shift, &m_hash_mask, 0, 0);
3127 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3128 sizeof(struct hlist_head),
3129 mphash_entries, 19,
3130 0,
3131 &mp_hash_shift, &mp_hash_mask, 0, 0);
3132
3133 if (!mount_hashtable || !mountpoint_hashtable)
3134 panic("Failed to allocate mount hash table\n");
3135
3136 for (u = 0; u <= m_hash_mask; u++)
3137 INIT_HLIST_HEAD(&mount_hashtable[u]);
3138 for (u = 0; u <= mp_hash_mask; u++)
3139 INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
3140
3141 kernfs_init();
3142
3143 err = sysfs_init();
3144 if (err)
3145 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3146 __func__, err);
3147 fs_kobj = kobject_create_and_add("fs", NULL);
3148 if (!fs_kobj)
3149 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3150 init_rootfs();
3151 init_mount_tree();
3152}
3153
3154void put_mnt_ns(struct mnt_namespace *ns)
3155{
3156 if (!atomic_dec_and_test(&ns->count))
3157 return;
3158 drop_collected_mounts(&ns->root->mnt);
3159 free_mnt_ns(ns);
3160}
3161
3162struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3163{
3164 struct vfsmount *mnt;
3165 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
3166 if (!IS_ERR(mnt)) {
3167
3168
3169
3170
3171 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3172 }
3173 return mnt;
3174}
3175EXPORT_SYMBOL_GPL(kern_mount_data);
3176
3177void kern_unmount(struct vfsmount *mnt)
3178{
3179
3180 if (!IS_ERR_OR_NULL(mnt)) {
3181 real_mount(mnt)->mnt_ns = NULL;
3182 synchronize_rcu();
3183 mntput(mnt);
3184 }
3185}
3186EXPORT_SYMBOL(kern_unmount);
3187
3188bool our_mnt(struct vfsmount *mnt)
3189{
3190 return check_mnt(real_mount(mnt));
3191}
3192
3193bool current_chrooted(void)
3194{
3195
3196 struct path ns_root;
3197 struct path fs_root;
3198 bool chrooted;
3199
3200
3201 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3202 ns_root.dentry = ns_root.mnt->mnt_root;
3203 path_get(&ns_root);
3204 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3205 ;
3206
3207 get_fs_root(current->fs, &fs_root);
3208
3209 chrooted = !path_equal(&fs_root, &ns_root);
3210
3211 path_put(&fs_root);
3212 path_put(&ns_root);
3213
3214 return chrooted;
3215}
3216
3217static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
3218{
3219 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3220 int new_flags = *new_mnt_flags;
3221 struct mount *mnt;
3222 bool visible = false;
3223
3224 if (unlikely(!ns))
3225 return false;
3226
3227 down_read(&namespace_sem);
3228 list_for_each_entry(mnt, &ns->list, mnt_list) {
3229 struct mount *child;
3230 int mnt_flags;
3231
3232 if (mnt->mnt.mnt_sb->s_type != type)
3233 continue;
3234
3235
3236
3237
3238 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3239 continue;
3240
3241
3242
3243
3244 mnt_flags = mnt->mnt.mnt_flags;
3245 if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
3246 mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
3247
3248
3249
3250
3251 if ((mnt_flags & MNT_LOCK_READONLY) &&
3252 !(new_flags & MNT_READONLY))
3253 continue;
3254 if ((mnt_flags & MNT_LOCK_NODEV) &&
3255 !(new_flags & MNT_NODEV))
3256 continue;
3257 if ((mnt_flags & MNT_LOCK_NOSUID) &&
3258 !(new_flags & MNT_NOSUID))
3259 continue;
3260 if ((mnt_flags & MNT_LOCK_NOEXEC) &&
3261 !(new_flags & MNT_NOEXEC))
3262 continue;
3263 if ((mnt_flags & MNT_LOCK_ATIME) &&
3264 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3265 continue;
3266
3267
3268
3269
3270
3271 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3272 struct inode *inode = child->mnt_mountpoint->d_inode;
3273
3274 if (!(mnt_flags & MNT_LOCKED))
3275 continue;
3276
3277 if (!is_empty_dir_inode(inode))
3278 goto next;
3279 }
3280
3281 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3282 MNT_LOCK_NODEV | \
3283 MNT_LOCK_NOSUID | \
3284 MNT_LOCK_NOEXEC | \
3285 MNT_LOCK_ATIME);
3286 visible = true;
3287 goto found;
3288 next: ;
3289 }
3290found:
3291 up_read(&namespace_sem);
3292 return visible;
3293}
3294
3295static struct ns_common *mntns_get(struct task_struct *task)
3296{
3297 struct ns_common *ns = NULL;
3298 struct nsproxy *nsproxy;
3299
3300 task_lock(task);
3301 nsproxy = task->nsproxy;
3302 if (nsproxy) {
3303 ns = &nsproxy->mnt_ns->ns;
3304 get_mnt_ns(to_mnt_ns(ns));
3305 }
3306 task_unlock(task);
3307
3308 return ns;
3309}
3310
3311static void mntns_put(struct ns_common *ns)
3312{
3313 put_mnt_ns(to_mnt_ns(ns));
3314}
3315
3316static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3317{
3318 struct fs_struct *fs = current->fs;
3319 struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
3320 struct path root;
3321
3322 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3323 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3324 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3325 return -EPERM;
3326
3327 if (fs->users != 1)
3328 return -EINVAL;
3329
3330 get_mnt_ns(mnt_ns);
3331 put_mnt_ns(nsproxy->mnt_ns);
3332 nsproxy->mnt_ns = mnt_ns;
3333
3334
3335 root.mnt = &mnt_ns->root->mnt;
3336 root.dentry = mnt_ns->root->mnt.mnt_root;
3337 path_get(&root);
3338 while(d_mountpoint(root.dentry) && follow_down_one(&root))
3339 ;
3340
3341
3342 set_fs_pwd(fs, &root);
3343 set_fs_root(fs, &root);
3344
3345 path_put(&root);
3346 return 0;
3347}
3348
3349const struct proc_ns_operations mntns_operations = {
3350 .name = "mnt",
3351 .type = CLONE_NEWNS,
3352 .get = mntns_get,
3353 .put = mntns_put,
3354 .install = mntns_install,
3355};
3356