1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/idr.h>
19#include <linux/acct.h>
20#include <linux/ramfs.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/uaccess.h>
24#include <linux/proc_ns.h>
25#include <linux/magic.h>
26#include "pnode.h"
27#include "internal.h"
28
29#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
30#define HASH_SIZE (1UL << HASH_SHIFT)
31
32static int event;
33static DEFINE_IDA(mnt_id_ida);
34static DEFINE_IDA(mnt_group_ida);
35static DEFINE_SPINLOCK(mnt_id_lock);
36static int mnt_id_start = 0;
37static int mnt_group_start = 1;
38
39static struct list_head *mount_hashtable __read_mostly;
40static struct list_head *mountpoint_hashtable __read_mostly;
41static struct kmem_cache *mnt_cache __read_mostly;
42static struct rw_semaphore namespace_sem;
43
44
45struct kobject *fs_kobj;
46EXPORT_SYMBOL_GPL(fs_kobj);
47
48
49
50
51
52
53
54
55
56DEFINE_BRLOCK(vfsmount_lock);
57
58static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
59{
60 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
61 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
62 tmp = tmp + (tmp >> HASH_SHIFT);
63 return tmp & (HASH_SIZE - 1);
64}
65
66#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
67
68
69
70
71
72static int mnt_alloc_id(struct mount *mnt)
73{
74 int res;
75
76retry:
77 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
78 spin_lock(&mnt_id_lock);
79 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
80 if (!res)
81 mnt_id_start = mnt->mnt_id + 1;
82 spin_unlock(&mnt_id_lock);
83 if (res == -EAGAIN)
84 goto retry;
85
86 return res;
87}
88
89static void mnt_free_id(struct mount *mnt)
90{
91 int id = mnt->mnt_id;
92 spin_lock(&mnt_id_lock);
93 ida_remove(&mnt_id_ida, id);
94 if (mnt_id_start > id)
95 mnt_id_start = id;
96 spin_unlock(&mnt_id_lock);
97}
98
99
100
101
102
103
104static int mnt_alloc_group_id(struct mount *mnt)
105{
106 int res;
107
108 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
109 return -ENOMEM;
110
111 res = ida_get_new_above(&mnt_group_ida,
112 mnt_group_start,
113 &mnt->mnt_group_id);
114 if (!res)
115 mnt_group_start = mnt->mnt_group_id + 1;
116
117 return res;
118}
119
120
121
122
123void mnt_release_group_id(struct mount *mnt)
124{
125 int id = mnt->mnt_group_id;
126 ida_remove(&mnt_group_ida, id);
127 if (mnt_group_start > id)
128 mnt_group_start = id;
129 mnt->mnt_group_id = 0;
130}
131
132
133
134
135static inline void mnt_add_count(struct mount *mnt, int n)
136{
137#ifdef CONFIG_SMP
138 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
139#else
140 preempt_disable();
141 mnt->mnt_count += n;
142 preempt_enable();
143#endif
144}
145
146
147
148
149unsigned int mnt_get_count(struct mount *mnt)
150{
151#ifdef CONFIG_SMP
152 unsigned int count = 0;
153 int cpu;
154
155 for_each_possible_cpu(cpu) {
156 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
157 }
158
159 return count;
160#else
161 return mnt->mnt_count;
162#endif
163}
164
165static struct mount *alloc_vfsmnt(const char *name)
166{
167 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
168 if (mnt) {
169 int err;
170
171 err = mnt_alloc_id(mnt);
172 if (err)
173 goto out_free_cache;
174
175 if (name) {
176 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
177 if (!mnt->mnt_devname)
178 goto out_free_id;
179 }
180
181#ifdef CONFIG_SMP
182 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
183 if (!mnt->mnt_pcp)
184 goto out_free_devname;
185
186 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
187#else
188 mnt->mnt_count = 1;
189 mnt->mnt_writers = 0;
190#endif
191
192 INIT_LIST_HEAD(&mnt->mnt_hash);
193 INIT_LIST_HEAD(&mnt->mnt_child);
194 INIT_LIST_HEAD(&mnt->mnt_mounts);
195 INIT_LIST_HEAD(&mnt->mnt_list);
196 INIT_LIST_HEAD(&mnt->mnt_expire);
197 INIT_LIST_HEAD(&mnt->mnt_share);
198 INIT_LIST_HEAD(&mnt->mnt_slave_list);
199 INIT_LIST_HEAD(&mnt->mnt_slave);
200#ifdef CONFIG_FSNOTIFY
201 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
202#endif
203 }
204 return mnt;
205
206#ifdef CONFIG_SMP
207out_free_devname:
208 kfree(mnt->mnt_devname);
209#endif
210out_free_id:
211 mnt_free_id(mnt);
212out_free_cache:
213 kmem_cache_free(mnt_cache, mnt);
214 return NULL;
215}
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236int __mnt_is_readonly(struct vfsmount *mnt)
237{
238 if (mnt->mnt_flags & MNT_READONLY)
239 return 1;
240 if (mnt->mnt_sb->s_flags & MS_RDONLY)
241 return 1;
242 return 0;
243}
244EXPORT_SYMBOL_GPL(__mnt_is_readonly);
245
246static inline void mnt_inc_writers(struct mount *mnt)
247{
248#ifdef CONFIG_SMP
249 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
250#else
251 mnt->mnt_writers++;
252#endif
253}
254
255static inline void mnt_dec_writers(struct mount *mnt)
256{
257#ifdef CONFIG_SMP
258 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
259#else
260 mnt->mnt_writers--;
261#endif
262}
263
264static unsigned int mnt_get_writers(struct mount *mnt)
265{
266#ifdef CONFIG_SMP
267 unsigned int count = 0;
268 int cpu;
269
270 for_each_possible_cpu(cpu) {
271 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
272 }
273
274 return count;
275#else
276 return mnt->mnt_writers;
277#endif
278}
279
280static int mnt_is_readonly(struct vfsmount *mnt)
281{
282 if (mnt->mnt_sb->s_readonly_remount)
283 return 1;
284
285 smp_rmb();
286 return __mnt_is_readonly(mnt);
287}
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305int __mnt_want_write(struct vfsmount *m)
306{
307 struct mount *mnt = real_mount(m);
308 int ret = 0;
309
310 preempt_disable();
311 mnt_inc_writers(mnt);
312
313
314
315
316
317 smp_mb();
318 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
319 cpu_relax();
320
321
322
323
324
325 smp_rmb();
326 if (mnt_is_readonly(m)) {
327 mnt_dec_writers(mnt);
328 ret = -EROFS;
329 }
330 preempt_enable();
331
332 return ret;
333}
334
335
336
337
338
339
340
341
342
343
344int mnt_want_write(struct vfsmount *m)
345{
346 int ret;
347
348 sb_start_write(m->mnt_sb);
349 ret = __mnt_want_write(m);
350 if (ret)
351 sb_end_write(m->mnt_sb);
352 return ret;
353}
354EXPORT_SYMBOL_GPL(mnt_want_write);
355
356
357
358
359
360
361
362
363
364
365
366
367
368int mnt_clone_write(struct vfsmount *mnt)
369{
370
371 if (__mnt_is_readonly(mnt))
372 return -EROFS;
373 preempt_disable();
374 mnt_inc_writers(real_mount(mnt));
375 preempt_enable();
376 return 0;
377}
378EXPORT_SYMBOL_GPL(mnt_clone_write);
379
380
381
382
383
384
385
386
387int __mnt_want_write_file(struct file *file)
388{
389 struct inode *inode = file_inode(file);
390
391 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
392 return __mnt_want_write(file->f_path.mnt);
393 else
394 return mnt_clone_write(file->f_path.mnt);
395}
396
397
398
399
400
401
402
403
404int mnt_want_write_file(struct file *file)
405{
406 int ret;
407
408 sb_start_write(file->f_path.mnt->mnt_sb);
409 ret = __mnt_want_write_file(file);
410 if (ret)
411 sb_end_write(file->f_path.mnt->mnt_sb);
412 return ret;
413}
414EXPORT_SYMBOL_GPL(mnt_want_write_file);
415
416
417
418
419
420
421
422
423
424void __mnt_drop_write(struct vfsmount *mnt)
425{
426 preempt_disable();
427 mnt_dec_writers(real_mount(mnt));
428 preempt_enable();
429}
430
431
432
433
434
435
436
437
438
439void mnt_drop_write(struct vfsmount *mnt)
440{
441 __mnt_drop_write(mnt);
442 sb_end_write(mnt->mnt_sb);
443}
444EXPORT_SYMBOL_GPL(mnt_drop_write);
445
446void __mnt_drop_write_file(struct file *file)
447{
448 __mnt_drop_write(file->f_path.mnt);
449}
450
451void mnt_drop_write_file(struct file *file)
452{
453 mnt_drop_write(file->f_path.mnt);
454}
455EXPORT_SYMBOL(mnt_drop_write_file);
456
457static int mnt_make_readonly(struct mount *mnt)
458{
459 int ret = 0;
460
461 br_write_lock(&vfsmount_lock);
462 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
463
464
465
466
467 smp_mb();
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485 if (mnt_get_writers(mnt) > 0)
486 ret = -EBUSY;
487 else
488 mnt->mnt.mnt_flags |= MNT_READONLY;
489
490
491
492
493 smp_wmb();
494 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
495 br_write_unlock(&vfsmount_lock);
496 return ret;
497}
498
499static void __mnt_unmake_readonly(struct mount *mnt)
500{
501 br_write_lock(&vfsmount_lock);
502 mnt->mnt.mnt_flags &= ~MNT_READONLY;
503 br_write_unlock(&vfsmount_lock);
504}
505
506int sb_prepare_remount_readonly(struct super_block *sb)
507{
508 struct mount *mnt;
509 int err = 0;
510
511
512 if (atomic_long_read(&sb->s_remove_count))
513 return -EBUSY;
514
515 br_write_lock(&vfsmount_lock);
516 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
517 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
518 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
519 smp_mb();
520 if (mnt_get_writers(mnt) > 0) {
521 err = -EBUSY;
522 break;
523 }
524 }
525 }
526 if (!err && atomic_long_read(&sb->s_remove_count))
527 err = -EBUSY;
528
529 if (!err) {
530 sb->s_readonly_remount = 1;
531 smp_wmb();
532 }
533 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
534 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
535 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
536 }
537 br_write_unlock(&vfsmount_lock);
538
539 return err;
540}
541
542static void free_vfsmnt(struct mount *mnt)
543{
544 kfree(mnt->mnt_devname);
545 mnt_free_id(mnt);
546#ifdef CONFIG_SMP
547 free_percpu(mnt->mnt_pcp);
548#endif
549 kmem_cache_free(mnt_cache, mnt);
550}
551
552
553
554
555
556
557struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
558 int dir)
559{
560 struct list_head *head = mount_hashtable + hash(mnt, dentry);
561 struct list_head *tmp = head;
562 struct mount *p, *found = NULL;
563
564 for (;;) {
565 tmp = dir ? tmp->next : tmp->prev;
566 p = NULL;
567 if (tmp == head)
568 break;
569 p = list_entry(tmp, struct mount, mnt_hash);
570 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
571 found = p;
572 break;
573 }
574 }
575 return found;
576}
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594struct vfsmount *lookup_mnt(struct path *path)
595{
596 struct mount *child_mnt;
597
598 br_read_lock(&vfsmount_lock);
599 child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
600 if (child_mnt) {
601 mnt_add_count(child_mnt, 1);
602 br_read_unlock(&vfsmount_lock);
603 return &child_mnt->mnt;
604 } else {
605 br_read_unlock(&vfsmount_lock);
606 return NULL;
607 }
608}
609
610static struct mountpoint *new_mountpoint(struct dentry *dentry)
611{
612 struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
613 struct mountpoint *mp;
614
615 list_for_each_entry(mp, chain, m_hash) {
616 if (mp->m_dentry == dentry) {
617
618 if (d_unlinked(dentry))
619 return ERR_PTR(-ENOENT);
620 mp->m_count++;
621 return mp;
622 }
623 }
624
625 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
626 if (!mp)
627 return ERR_PTR(-ENOMEM);
628
629 spin_lock(&dentry->d_lock);
630 if (d_unlinked(dentry)) {
631 spin_unlock(&dentry->d_lock);
632 kfree(mp);
633 return ERR_PTR(-ENOENT);
634 }
635 dentry->d_flags |= DCACHE_MOUNTED;
636 spin_unlock(&dentry->d_lock);
637 mp->m_dentry = dentry;
638 mp->m_count = 1;
639 list_add(&mp->m_hash, chain);
640 return mp;
641}
642
643static void put_mountpoint(struct mountpoint *mp)
644{
645 if (!--mp->m_count) {
646 struct dentry *dentry = mp->m_dentry;
647 spin_lock(&dentry->d_lock);
648 dentry->d_flags &= ~DCACHE_MOUNTED;
649 spin_unlock(&dentry->d_lock);
650 list_del(&mp->m_hash);
651 kfree(mp);
652 }
653}
654
655static inline int check_mnt(struct mount *mnt)
656{
657 return mnt->mnt_ns == current->nsproxy->mnt_ns;
658}
659
660
661
662
663static void touch_mnt_namespace(struct mnt_namespace *ns)
664{
665 if (ns) {
666 ns->event = ++event;
667 wake_up_interruptible(&ns->poll);
668 }
669}
670
671
672
673
674static void __touch_mnt_namespace(struct mnt_namespace *ns)
675{
676 if (ns && ns->event != event) {
677 ns->event = event;
678 wake_up_interruptible(&ns->poll);
679 }
680}
681
682
683
684
685static void detach_mnt(struct mount *mnt, struct path *old_path)
686{
687 old_path->dentry = mnt->mnt_mountpoint;
688 old_path->mnt = &mnt->mnt_parent->mnt;
689 mnt->mnt_parent = mnt;
690 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
691 list_del_init(&mnt->mnt_child);
692 list_del_init(&mnt->mnt_hash);
693 put_mountpoint(mnt->mnt_mp);
694 mnt->mnt_mp = NULL;
695}
696
697
698
699
700void mnt_set_mountpoint(struct mount *mnt,
701 struct mountpoint *mp,
702 struct mount *child_mnt)
703{
704 mp->m_count++;
705 mnt_add_count(mnt, 1);
706 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
707 child_mnt->mnt_parent = mnt;
708 child_mnt->mnt_mp = mp;
709}
710
711
712
713
714static void attach_mnt(struct mount *mnt,
715 struct mount *parent,
716 struct mountpoint *mp)
717{
718 mnt_set_mountpoint(parent, mp, mnt);
719 list_add_tail(&mnt->mnt_hash, mount_hashtable +
720 hash(&parent->mnt, mp->m_dentry));
721 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
722}
723
724
725
726
727static void commit_tree(struct mount *mnt)
728{
729 struct mount *parent = mnt->mnt_parent;
730 struct mount *m;
731 LIST_HEAD(head);
732 struct mnt_namespace *n = parent->mnt_ns;
733
734 BUG_ON(parent == mnt);
735
736 list_add_tail(&head, &mnt->mnt_list);
737 list_for_each_entry(m, &head, mnt_list)
738 m->mnt_ns = n;
739
740 list_splice(&head, n->list.prev);
741
742 list_add_tail(&mnt->mnt_hash, mount_hashtable +
743 hash(&parent->mnt, mnt->mnt_mountpoint));
744 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
745 touch_mnt_namespace(n);
746}
747
748static struct mount *next_mnt(struct mount *p, struct mount *root)
749{
750 struct list_head *next = p->mnt_mounts.next;
751 if (next == &p->mnt_mounts) {
752 while (1) {
753 if (p == root)
754 return NULL;
755 next = p->mnt_child.next;
756 if (next != &p->mnt_parent->mnt_mounts)
757 break;
758 p = p->mnt_parent;
759 }
760 }
761 return list_entry(next, struct mount, mnt_child);
762}
763
764static struct mount *skip_mnt_tree(struct mount *p)
765{
766 struct list_head *prev = p->mnt_mounts.prev;
767 while (prev != &p->mnt_mounts) {
768 p = list_entry(prev, struct mount, mnt_child);
769 prev = p->mnt_mounts.prev;
770 }
771 return p;
772}
773
774struct vfsmount *
775vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
776{
777 struct mount *mnt;
778 struct dentry *root;
779
780 if (!type)
781 return ERR_PTR(-ENODEV);
782
783 mnt = alloc_vfsmnt(name);
784 if (!mnt)
785 return ERR_PTR(-ENOMEM);
786
787 if (flags & MS_KERNMOUNT)
788 mnt->mnt.mnt_flags = MNT_INTERNAL;
789
790 root = mount_fs(type, flags, name, data);
791 if (IS_ERR(root)) {
792 free_vfsmnt(mnt);
793 return ERR_CAST(root);
794 }
795
796 mnt->mnt.mnt_root = root;
797 mnt->mnt.mnt_sb = root->d_sb;
798 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
799 mnt->mnt_parent = mnt;
800 br_write_lock(&vfsmount_lock);
801 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
802 br_write_unlock(&vfsmount_lock);
803 return &mnt->mnt;
804}
805EXPORT_SYMBOL_GPL(vfs_kern_mount);
806
807static struct mount *clone_mnt(struct mount *old, struct dentry *root,
808 int flag)
809{
810 struct super_block *sb = old->mnt.mnt_sb;
811 struct mount *mnt;
812 int err;
813
814 mnt = alloc_vfsmnt(old->mnt_devname);
815 if (!mnt)
816 return ERR_PTR(-ENOMEM);
817
818 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
819 mnt->mnt_group_id = 0;
820 else
821 mnt->mnt_group_id = old->mnt_group_id;
822
823 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
824 err = mnt_alloc_group_id(mnt);
825 if (err)
826 goto out_free;
827 }
828
829 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
830
831 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
832 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
833
834 atomic_inc(&sb->s_active);
835 mnt->mnt.mnt_sb = sb;
836 mnt->mnt.mnt_root = dget(root);
837 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
838 mnt->mnt_parent = mnt;
839 br_write_lock(&vfsmount_lock);
840 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
841 br_write_unlock(&vfsmount_lock);
842
843 if ((flag & CL_SLAVE) ||
844 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
845 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
846 mnt->mnt_master = old;
847 CLEAR_MNT_SHARED(mnt);
848 } else if (!(flag & CL_PRIVATE)) {
849 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
850 list_add(&mnt->mnt_share, &old->mnt_share);
851 if (IS_MNT_SLAVE(old))
852 list_add(&mnt->mnt_slave, &old->mnt_slave);
853 mnt->mnt_master = old->mnt_master;
854 }
855 if (flag & CL_MAKE_SHARED)
856 set_mnt_shared(mnt);
857
858
859
860 if (flag & CL_EXPIRE) {
861 if (!list_empty(&old->mnt_expire))
862 list_add(&mnt->mnt_expire, &old->mnt_expire);
863 }
864
865 return mnt;
866
867 out_free:
868 free_vfsmnt(mnt);
869 return ERR_PTR(err);
870}
871
872static inline void mntfree(struct mount *mnt)
873{
874 struct vfsmount *m = &mnt->mnt;
875 struct super_block *sb = m->mnt_sb;
876
877
878
879
880
881
882
883
884
885
886
887 WARN_ON(mnt_get_writers(mnt));
888 fsnotify_vfsmount_delete(m);
889 dput(m->mnt_root);
890 free_vfsmnt(mnt);
891 deactivate_super(sb);
892}
893
894static void mntput_no_expire(struct mount *mnt)
895{
896put_again:
897#ifdef CONFIG_SMP
898 br_read_lock(&vfsmount_lock);
899 if (likely(mnt->mnt_ns)) {
900
901 mnt_add_count(mnt, -1);
902 br_read_unlock(&vfsmount_lock);
903 return;
904 }
905 br_read_unlock(&vfsmount_lock);
906
907 br_write_lock(&vfsmount_lock);
908 mnt_add_count(mnt, -1);
909 if (mnt_get_count(mnt)) {
910 br_write_unlock(&vfsmount_lock);
911 return;
912 }
913#else
914 mnt_add_count(mnt, -1);
915 if (likely(mnt_get_count(mnt)))
916 return;
917 br_write_lock(&vfsmount_lock);
918#endif
919 if (unlikely(mnt->mnt_pinned)) {
920 mnt_add_count(mnt, mnt->mnt_pinned + 1);
921 mnt->mnt_pinned = 0;
922 br_write_unlock(&vfsmount_lock);
923 acct_auto_close_mnt(&mnt->mnt);
924 goto put_again;
925 }
926
927 list_del(&mnt->mnt_instance);
928 br_write_unlock(&vfsmount_lock);
929 mntfree(mnt);
930}
931
932void mntput(struct vfsmount *mnt)
933{
934 if (mnt) {
935 struct mount *m = real_mount(mnt);
936
937 if (unlikely(m->mnt_expiry_mark))
938 m->mnt_expiry_mark = 0;
939 mntput_no_expire(m);
940 }
941}
942EXPORT_SYMBOL(mntput);
943
944struct vfsmount *mntget(struct vfsmount *mnt)
945{
946 if (mnt)
947 mnt_add_count(real_mount(mnt), 1);
948 return mnt;
949}
950EXPORT_SYMBOL(mntget);
951
952void mnt_pin(struct vfsmount *mnt)
953{
954 br_write_lock(&vfsmount_lock);
955 real_mount(mnt)->mnt_pinned++;
956 br_write_unlock(&vfsmount_lock);
957}
958EXPORT_SYMBOL(mnt_pin);
959
960void mnt_unpin(struct vfsmount *m)
961{
962 struct mount *mnt = real_mount(m);
963 br_write_lock(&vfsmount_lock);
964 if (mnt->mnt_pinned) {
965 mnt_add_count(mnt, 1);
966 mnt->mnt_pinned--;
967 }
968 br_write_unlock(&vfsmount_lock);
969}
970EXPORT_SYMBOL(mnt_unpin);
971
972static inline void mangle(struct seq_file *m, const char *s)
973{
974 seq_escape(m, s, " \t\n\\");
975}
976
977
978
979
980
981
982
983int generic_show_options(struct seq_file *m, struct dentry *root)
984{
985 const char *options;
986
987 rcu_read_lock();
988 options = rcu_dereference(root->d_sb->s_options);
989
990 if (options != NULL && options[0]) {
991 seq_putc(m, ',');
992 mangle(m, options);
993 }
994 rcu_read_unlock();
995
996 return 0;
997}
998EXPORT_SYMBOL(generic_show_options);
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013void save_mount_options(struct super_block *sb, char *options)
1014{
1015 BUG_ON(sb->s_options);
1016 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
1017}
1018EXPORT_SYMBOL(save_mount_options);
1019
1020void replace_mount_options(struct super_block *sb, char *options)
1021{
1022 char *old = sb->s_options;
1023 rcu_assign_pointer(sb->s_options, options);
1024 if (old) {
1025 synchronize_rcu();
1026 kfree(old);
1027 }
1028}
1029EXPORT_SYMBOL(replace_mount_options);
1030
1031#ifdef CONFIG_PROC_FS
1032
1033static void *m_start(struct seq_file *m, loff_t *pos)
1034{
1035 struct proc_mounts *p = proc_mounts(m);
1036
1037 down_read(&namespace_sem);
1038 return seq_list_start(&p->ns->list, *pos);
1039}
1040
1041static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1042{
1043 struct proc_mounts *p = proc_mounts(m);
1044
1045 return seq_list_next(v, &p->ns->list, pos);
1046}
1047
1048static void m_stop(struct seq_file *m, void *v)
1049{
1050 up_read(&namespace_sem);
1051}
1052
1053static int m_show(struct seq_file *m, void *v)
1054{
1055 struct proc_mounts *p = proc_mounts(m);
1056 struct mount *r = list_entry(v, struct mount, mnt_list);
1057 return p->show(m, &r->mnt);
1058}
1059
1060const struct seq_operations mounts_op = {
1061 .start = m_start,
1062 .next = m_next,
1063 .stop = m_stop,
1064 .show = m_show,
1065};
1066#endif
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076int may_umount_tree(struct vfsmount *m)
1077{
1078 struct mount *mnt = real_mount(m);
1079 int actual_refs = 0;
1080 int minimum_refs = 0;
1081 struct mount *p;
1082 BUG_ON(!m);
1083
1084
1085 br_write_lock(&vfsmount_lock);
1086 for (p = mnt; p; p = next_mnt(p, mnt)) {
1087 actual_refs += mnt_get_count(p);
1088 minimum_refs += 2;
1089 }
1090 br_write_unlock(&vfsmount_lock);
1091
1092 if (actual_refs > minimum_refs)
1093 return 0;
1094
1095 return 1;
1096}
1097
1098EXPORT_SYMBOL(may_umount_tree);
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113int may_umount(struct vfsmount *mnt)
1114{
1115 int ret = 1;
1116 down_read(&namespace_sem);
1117 br_write_lock(&vfsmount_lock);
1118 if (propagate_mount_busy(real_mount(mnt), 2))
1119 ret = 0;
1120 br_write_unlock(&vfsmount_lock);
1121 up_read(&namespace_sem);
1122 return ret;
1123}
1124
1125EXPORT_SYMBOL(may_umount);
1126
1127static LIST_HEAD(unmounted);
1128
1129static void namespace_unlock(void)
1130{
1131 struct mount *mnt;
1132 LIST_HEAD(head);
1133
1134 if (likely(list_empty(&unmounted))) {
1135 up_write(&namespace_sem);
1136 return;
1137 }
1138
1139 list_splice_init(&unmounted, &head);
1140 up_write(&namespace_sem);
1141
1142 while (!list_empty(&head)) {
1143 mnt = list_first_entry(&head, struct mount, mnt_hash);
1144 list_del_init(&mnt->mnt_hash);
1145 if (mnt_has_parent(mnt)) {
1146 struct dentry *dentry;
1147 struct mount *m;
1148
1149 br_write_lock(&vfsmount_lock);
1150 dentry = mnt->mnt_mountpoint;
1151 m = mnt->mnt_parent;
1152 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1153 mnt->mnt_parent = mnt;
1154 m->mnt_ghosts--;
1155 br_write_unlock(&vfsmount_lock);
1156 dput(dentry);
1157 mntput(&m->mnt);
1158 }
1159 mntput(&mnt->mnt);
1160 }
1161}
1162
1163static inline void namespace_lock(void)
1164{
1165 down_write(&namespace_sem);
1166}
1167
1168
1169
1170
1171
1172void umount_tree(struct mount *mnt, int propagate)
1173{
1174 LIST_HEAD(tmp_list);
1175 struct mount *p;
1176
1177 for (p = mnt; p; p = next_mnt(p, mnt))
1178 list_move(&p->mnt_hash, &tmp_list);
1179
1180 if (propagate)
1181 propagate_umount(&tmp_list);
1182
1183 list_for_each_entry(p, &tmp_list, mnt_hash) {
1184 list_del_init(&p->mnt_expire);
1185 list_del_init(&p->mnt_list);
1186 __touch_mnt_namespace(p->mnt_ns);
1187 p->mnt_ns = NULL;
1188 list_del_init(&p->mnt_child);
1189 if (mnt_has_parent(p)) {
1190 p->mnt_parent->mnt_ghosts++;
1191 put_mountpoint(p->mnt_mp);
1192 p->mnt_mp = NULL;
1193 }
1194 change_mnt_propagation(p, MS_PRIVATE);
1195 }
1196 list_splice(&tmp_list, &unmounted);
1197}
1198
1199static void shrink_submounts(struct mount *mnt);
1200
1201static int do_umount(struct mount *mnt, int flags)
1202{
1203 struct super_block *sb = mnt->mnt.mnt_sb;
1204 int retval;
1205
1206 retval = security_sb_umount(&mnt->mnt, flags);
1207 if (retval)
1208 return retval;
1209
1210
1211
1212
1213
1214
1215
1216 if (flags & MNT_EXPIRE) {
1217 if (&mnt->mnt == current->fs->root.mnt ||
1218 flags & (MNT_FORCE | MNT_DETACH))
1219 return -EINVAL;
1220
1221
1222
1223
1224
1225 br_write_lock(&vfsmount_lock);
1226 if (mnt_get_count(mnt) != 2) {
1227 br_write_unlock(&vfsmount_lock);
1228 return -EBUSY;
1229 }
1230 br_write_unlock(&vfsmount_lock);
1231
1232 if (!xchg(&mnt->mnt_expiry_mark, 1))
1233 return -EAGAIN;
1234 }
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1247 sb->s_op->umount_begin(sb);
1248 }
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1260
1261
1262
1263
1264 down_write(&sb->s_umount);
1265 if (!(sb->s_flags & MS_RDONLY))
1266 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1267 up_write(&sb->s_umount);
1268 return retval;
1269 }
1270
1271 namespace_lock();
1272 br_write_lock(&vfsmount_lock);
1273 event++;
1274
1275 if (!(flags & MNT_DETACH))
1276 shrink_submounts(mnt);
1277
1278 retval = -EBUSY;
1279 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1280 if (!list_empty(&mnt->mnt_list))
1281 umount_tree(mnt, 1);
1282 retval = 0;
1283 }
1284 br_write_unlock(&vfsmount_lock);
1285 namespace_unlock();
1286 return retval;
1287}
1288
1289
1290
1291
1292static inline bool may_mount(void)
1293{
1294 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1295}
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1306{
1307 struct path path;
1308 struct mount *mnt;
1309 int retval;
1310 int lookup_flags = 0;
1311
1312 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1313 return -EINVAL;
1314
1315 if (!may_mount())
1316 return -EPERM;
1317
1318 if (!(flags & UMOUNT_NOFOLLOW))
1319 lookup_flags |= LOOKUP_FOLLOW;
1320
1321 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1322 if (retval)
1323 goto out;
1324 mnt = real_mount(path.mnt);
1325 retval = -EINVAL;
1326 if (path.dentry != path.mnt->mnt_root)
1327 goto dput_and_out;
1328 if (!check_mnt(mnt))
1329 goto dput_and_out;
1330
1331 retval = do_umount(mnt, flags);
1332dput_and_out:
1333
1334 dput(path.dentry);
1335 mntput_no_expire(mnt);
1336out:
1337 return retval;
1338}
1339
1340#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1341
1342
1343
1344
1345SYSCALL_DEFINE1(oldumount, char __user *, name)
1346{
1347 return sys_umount(name, 0);
1348}
1349
1350#endif
1351
1352static bool mnt_ns_loop(struct path *path)
1353{
1354
1355
1356
1357 struct inode *inode = path->dentry->d_inode;
1358 struct proc_ns *ei;
1359 struct mnt_namespace *mnt_ns;
1360
1361 if (!proc_ns_inode(inode))
1362 return false;
1363
1364 ei = get_proc_ns(inode);
1365 if (ei->ns_ops != &mntns_operations)
1366 return false;
1367
1368 mnt_ns = ei->ns;
1369 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1370}
1371
1372struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1373 int flag)
1374{
1375 struct mount *res, *p, *q, *r, *parent;
1376
1377 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1378 return ERR_PTR(-EINVAL);
1379
1380 res = q = clone_mnt(mnt, dentry, flag);
1381 if (IS_ERR(q))
1382 return q;
1383
1384 q->mnt_mountpoint = mnt->mnt_mountpoint;
1385
1386 p = mnt;
1387 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1388 struct mount *s;
1389 if (!is_subdir(r->mnt_mountpoint, dentry))
1390 continue;
1391
1392 for (s = r; s; s = next_mnt(s, r)) {
1393 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
1394 s = skip_mnt_tree(s);
1395 continue;
1396 }
1397 while (p != s->mnt_parent) {
1398 p = p->mnt_parent;
1399 q = q->mnt_parent;
1400 }
1401 p = s;
1402 parent = q;
1403 q = clone_mnt(p, p->mnt.mnt_root, flag);
1404 if (IS_ERR(q))
1405 goto out;
1406 br_write_lock(&vfsmount_lock);
1407 list_add_tail(&q->mnt_list, &res->mnt_list);
1408 attach_mnt(q, parent, p->mnt_mp);
1409 br_write_unlock(&vfsmount_lock);
1410 }
1411 }
1412 return res;
1413out:
1414 if (res) {
1415 br_write_lock(&vfsmount_lock);
1416 umount_tree(res, 0);
1417 br_write_unlock(&vfsmount_lock);
1418 }
1419 return q;
1420}
1421
1422
1423
1424struct vfsmount *collect_mounts(struct path *path)
1425{
1426 struct mount *tree;
1427 namespace_lock();
1428 tree = copy_tree(real_mount(path->mnt), path->dentry,
1429 CL_COPY_ALL | CL_PRIVATE);
1430 namespace_unlock();
1431 if (IS_ERR(tree))
1432 return ERR_CAST(tree);
1433 return &tree->mnt;
1434}
1435
1436void drop_collected_mounts(struct vfsmount *mnt)
1437{
1438 namespace_lock();
1439 br_write_lock(&vfsmount_lock);
1440 umount_tree(real_mount(mnt), 0);
1441 br_write_unlock(&vfsmount_lock);
1442 namespace_unlock();
1443}
1444
1445int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1446 struct vfsmount *root)
1447{
1448 struct mount *mnt;
1449 int res = f(root, arg);
1450 if (res)
1451 return res;
1452 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1453 res = f(&mnt->mnt, arg);
1454 if (res)
1455 return res;
1456 }
1457 return 0;
1458}
1459
1460static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1461{
1462 struct mount *p;
1463
1464 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1465 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1466 mnt_release_group_id(p);
1467 }
1468}
1469
1470static int invent_group_ids(struct mount *mnt, bool recurse)
1471{
1472 struct mount *p;
1473
1474 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1475 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1476 int err = mnt_alloc_group_id(p);
1477 if (err) {
1478 cleanup_group_ids(mnt, p);
1479 return err;
1480 }
1481 }
1482 }
1483
1484 return 0;
1485}
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550static int attach_recursive_mnt(struct mount *source_mnt,
1551 struct mount *dest_mnt,
1552 struct mountpoint *dest_mp,
1553 struct path *parent_path)
1554{
1555 LIST_HEAD(tree_list);
1556 struct mount *child, *p;
1557 int err;
1558
1559 if (IS_MNT_SHARED(dest_mnt)) {
1560 err = invent_group_ids(source_mnt, true);
1561 if (err)
1562 goto out;
1563 }
1564 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1565 if (err)
1566 goto out_cleanup_ids;
1567
1568 br_write_lock(&vfsmount_lock);
1569
1570 if (IS_MNT_SHARED(dest_mnt)) {
1571 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1572 set_mnt_shared(p);
1573 }
1574 if (parent_path) {
1575 detach_mnt(source_mnt, parent_path);
1576 attach_mnt(source_mnt, dest_mnt, dest_mp);
1577 touch_mnt_namespace(source_mnt->mnt_ns);
1578 } else {
1579 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1580 commit_tree(source_mnt);
1581 }
1582
1583 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
1584 list_del_init(&child->mnt_hash);
1585 commit_tree(child);
1586 }
1587 br_write_unlock(&vfsmount_lock);
1588
1589 return 0;
1590
1591 out_cleanup_ids:
1592 if (IS_MNT_SHARED(dest_mnt))
1593 cleanup_group_ids(source_mnt, NULL);
1594 out:
1595 return err;
1596}
1597
1598static struct mountpoint *lock_mount(struct path *path)
1599{
1600 struct vfsmount *mnt;
1601 struct dentry *dentry = path->dentry;
1602retry:
1603 mutex_lock(&dentry->d_inode->i_mutex);
1604 if (unlikely(cant_mount(dentry))) {
1605 mutex_unlock(&dentry->d_inode->i_mutex);
1606 return ERR_PTR(-ENOENT);
1607 }
1608 namespace_lock();
1609 mnt = lookup_mnt(path);
1610 if (likely(!mnt)) {
1611 struct mountpoint *mp = new_mountpoint(dentry);
1612 if (IS_ERR(mp)) {
1613 namespace_unlock();
1614 mutex_unlock(&dentry->d_inode->i_mutex);
1615 return mp;
1616 }
1617 return mp;
1618 }
1619 namespace_unlock();
1620 mutex_unlock(&path->dentry->d_inode->i_mutex);
1621 path_put(path);
1622 path->mnt = mnt;
1623 dentry = path->dentry = dget(mnt->mnt_root);
1624 goto retry;
1625}
1626
1627static void unlock_mount(struct mountpoint *where)
1628{
1629 struct dentry *dentry = where->m_dentry;
1630 put_mountpoint(where);
1631 namespace_unlock();
1632 mutex_unlock(&dentry->d_inode->i_mutex);
1633}
1634
1635static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1636{
1637 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1638 return -EINVAL;
1639
1640 if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
1641 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1642 return -ENOTDIR;
1643
1644 return attach_recursive_mnt(mnt, p, mp, NULL);
1645}
1646
1647
1648
1649
1650
1651static int flags_to_propagation_type(int flags)
1652{
1653 int type = flags & ~(MS_REC | MS_SILENT);
1654
1655
1656 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1657 return 0;
1658
1659 if (!is_power_of_2(type))
1660 return 0;
1661 return type;
1662}
1663
1664
1665
1666
1667static int do_change_type(struct path *path, int flag)
1668{
1669 struct mount *m;
1670 struct mount *mnt = real_mount(path->mnt);
1671 int recurse = flag & MS_REC;
1672 int type;
1673 int err = 0;
1674
1675 if (path->dentry != path->mnt->mnt_root)
1676 return -EINVAL;
1677
1678 type = flags_to_propagation_type(flag);
1679 if (!type)
1680 return -EINVAL;
1681
1682 namespace_lock();
1683 if (type == MS_SHARED) {
1684 err = invent_group_ids(mnt, recurse);
1685 if (err)
1686 goto out_unlock;
1687 }
1688
1689 br_write_lock(&vfsmount_lock);
1690 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1691 change_mnt_propagation(m, type);
1692 br_write_unlock(&vfsmount_lock);
1693
1694 out_unlock:
1695 namespace_unlock();
1696 return err;
1697}
1698
1699
1700
1701
1702static int do_loopback(struct path *path, const char *old_name,
1703 int recurse)
1704{
1705 struct path old_path;
1706 struct mount *mnt = NULL, *old, *parent;
1707 struct mountpoint *mp;
1708 int err;
1709 if (!old_name || !*old_name)
1710 return -EINVAL;
1711 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
1712 if (err)
1713 return err;
1714
1715 err = -EINVAL;
1716 if (mnt_ns_loop(&old_path))
1717 goto out;
1718
1719 mp = lock_mount(path);
1720 err = PTR_ERR(mp);
1721 if (IS_ERR(mp))
1722 goto out;
1723
1724 old = real_mount(old_path.mnt);
1725 parent = real_mount(path->mnt);
1726
1727 err = -EINVAL;
1728 if (IS_MNT_UNBINDABLE(old))
1729 goto out2;
1730
1731 if (!check_mnt(parent) || !check_mnt(old))
1732 goto out2;
1733
1734 if (recurse)
1735 mnt = copy_tree(old, old_path.dentry, 0);
1736 else
1737 mnt = clone_mnt(old, old_path.dentry, 0);
1738
1739 if (IS_ERR(mnt)) {
1740 err = PTR_ERR(mnt);
1741 goto out2;
1742 }
1743
1744 err = graft_tree(mnt, parent, mp);
1745 if (err) {
1746 br_write_lock(&vfsmount_lock);
1747 umount_tree(mnt, 0);
1748 br_write_unlock(&vfsmount_lock);
1749 }
1750out2:
1751 unlock_mount(mp);
1752out:
1753 path_put(&old_path);
1754 return err;
1755}
1756
1757static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1758{
1759 int error = 0;
1760 int readonly_request = 0;
1761
1762 if (ms_flags & MS_RDONLY)
1763 readonly_request = 1;
1764 if (readonly_request == __mnt_is_readonly(mnt))
1765 return 0;
1766
1767 if (mnt->mnt_flags & MNT_LOCK_READONLY)
1768 return -EPERM;
1769
1770 if (readonly_request)
1771 error = mnt_make_readonly(real_mount(mnt));
1772 else
1773 __mnt_unmake_readonly(real_mount(mnt));
1774 return error;
1775}
1776
1777
1778
1779
1780
1781
1782static int do_remount(struct path *path, int flags, int mnt_flags,
1783 void *data)
1784{
1785 int err;
1786 struct super_block *sb = path->mnt->mnt_sb;
1787 struct mount *mnt = real_mount(path->mnt);
1788
1789 if (!check_mnt(mnt))
1790 return -EINVAL;
1791
1792 if (path->dentry != path->mnt->mnt_root)
1793 return -EINVAL;
1794
1795 err = security_sb_remount(sb, data);
1796 if (err)
1797 return err;
1798
1799 down_write(&sb->s_umount);
1800 if (flags & MS_BIND)
1801 err = change_mount_flags(path->mnt, flags);
1802 else if (!capable(CAP_SYS_ADMIN))
1803 err = -EPERM;
1804 else
1805 err = do_remount_sb(sb, flags, data, 0);
1806 if (!err) {
1807 br_write_lock(&vfsmount_lock);
1808 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
1809 mnt->mnt.mnt_flags = mnt_flags;
1810 br_write_unlock(&vfsmount_lock);
1811 }
1812 up_write(&sb->s_umount);
1813 if (!err) {
1814 br_write_lock(&vfsmount_lock);
1815 touch_mnt_namespace(mnt->mnt_ns);
1816 br_write_unlock(&vfsmount_lock);
1817 }
1818 return err;
1819}
1820
1821static inline int tree_contains_unbindable(struct mount *mnt)
1822{
1823 struct mount *p;
1824 for (p = mnt; p; p = next_mnt(p, mnt)) {
1825 if (IS_MNT_UNBINDABLE(p))
1826 return 1;
1827 }
1828 return 0;
1829}
1830
1831static int do_move_mount(struct path *path, const char *old_name)
1832{
1833 struct path old_path, parent_path;
1834 struct mount *p;
1835 struct mount *old;
1836 struct mountpoint *mp;
1837 int err;
1838 if (!old_name || !*old_name)
1839 return -EINVAL;
1840 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1841 if (err)
1842 return err;
1843
1844 mp = lock_mount(path);
1845 err = PTR_ERR(mp);
1846 if (IS_ERR(mp))
1847 goto out;
1848
1849 old = real_mount(old_path.mnt);
1850 p = real_mount(path->mnt);
1851
1852 err = -EINVAL;
1853 if (!check_mnt(p) || !check_mnt(old))
1854 goto out1;
1855
1856 err = -EINVAL;
1857 if (old_path.dentry != old_path.mnt->mnt_root)
1858 goto out1;
1859
1860 if (!mnt_has_parent(old))
1861 goto out1;
1862
1863 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1864 S_ISDIR(old_path.dentry->d_inode->i_mode))
1865 goto out1;
1866
1867
1868
1869 if (IS_MNT_SHARED(old->mnt_parent))
1870 goto out1;
1871
1872
1873
1874
1875 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
1876 goto out1;
1877 err = -ELOOP;
1878 for (; mnt_has_parent(p); p = p->mnt_parent)
1879 if (p == old)
1880 goto out1;
1881
1882 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
1883 if (err)
1884 goto out1;
1885
1886
1887
1888 list_del_init(&old->mnt_expire);
1889out1:
1890 unlock_mount(mp);
1891out:
1892 if (!err)
1893 path_put(&parent_path);
1894 path_put(&old_path);
1895 return err;
1896}
1897
1898static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1899{
1900 int err;
1901 const char *subtype = strchr(fstype, '.');
1902 if (subtype) {
1903 subtype++;
1904 err = -EINVAL;
1905 if (!subtype[0])
1906 goto err;
1907 } else
1908 subtype = "";
1909
1910 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
1911 err = -ENOMEM;
1912 if (!mnt->mnt_sb->s_subtype)
1913 goto err;
1914 return mnt;
1915
1916 err:
1917 mntput(mnt);
1918 return ERR_PTR(err);
1919}
1920
1921
1922
1923
1924static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1925{
1926 struct mountpoint *mp;
1927 struct mount *parent;
1928 int err;
1929
1930 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1931
1932 mp = lock_mount(path);
1933 if (IS_ERR(mp))
1934 return PTR_ERR(mp);
1935
1936 parent = real_mount(path->mnt);
1937 err = -EINVAL;
1938 if (unlikely(!check_mnt(parent))) {
1939
1940 if (!(mnt_flags & MNT_SHRINKABLE))
1941 goto unlock;
1942
1943 if (!parent->mnt_ns)
1944 goto unlock;
1945 }
1946
1947
1948 err = -EBUSY;
1949 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
1950 path->mnt->mnt_root == path->dentry)
1951 goto unlock;
1952
1953 err = -EINVAL;
1954 if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
1955 goto unlock;
1956
1957 newmnt->mnt.mnt_flags = mnt_flags;
1958 err = graft_tree(newmnt, parent, mp);
1959
1960unlock:
1961 unlock_mount(mp);
1962 return err;
1963}
1964
1965
1966
1967
1968
1969static int do_new_mount(struct path *path, const char *fstype, int flags,
1970 int mnt_flags, const char *name, void *data)
1971{
1972 struct file_system_type *type;
1973 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
1974 struct vfsmount *mnt;
1975 int err;
1976
1977 if (!fstype)
1978 return -EINVAL;
1979
1980 type = get_fs_type(fstype);
1981 if (!type)
1982 return -ENODEV;
1983
1984 if (user_ns != &init_user_ns) {
1985 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
1986 put_filesystem(type);
1987 return -EPERM;
1988 }
1989
1990
1991
1992 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
1993 flags |= MS_NODEV;
1994 mnt_flags |= MNT_NODEV;
1995 }
1996 }
1997
1998 mnt = vfs_kern_mount(type, flags, name, data);
1999 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2000 !mnt->mnt_sb->s_subtype)
2001 mnt = fs_set_subtype(mnt, fstype);
2002
2003 put_filesystem(type);
2004 if (IS_ERR(mnt))
2005 return PTR_ERR(mnt);
2006
2007 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2008 if (err)
2009 mntput(mnt);
2010 return err;
2011}
2012
2013int finish_automount(struct vfsmount *m, struct path *path)
2014{
2015 struct mount *mnt = real_mount(m);
2016 int err;
2017
2018
2019
2020 BUG_ON(mnt_get_count(mnt) < 2);
2021
2022 if (m->mnt_sb == path->mnt->mnt_sb &&
2023 m->mnt_root == path->dentry) {
2024 err = -ELOOP;
2025 goto fail;
2026 }
2027
2028 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2029 if (!err)
2030 return 0;
2031fail:
2032
2033 if (!list_empty(&mnt->mnt_expire)) {
2034 namespace_lock();
2035 br_write_lock(&vfsmount_lock);
2036 list_del_init(&mnt->mnt_expire);
2037 br_write_unlock(&vfsmount_lock);
2038 namespace_unlock();
2039 }
2040 mntput(m);
2041 mntput(m);
2042 return err;
2043}
2044
2045
2046
2047
2048
2049
2050void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2051{
2052 namespace_lock();
2053 br_write_lock(&vfsmount_lock);
2054
2055 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2056
2057 br_write_unlock(&vfsmount_lock);
2058 namespace_unlock();
2059}
2060EXPORT_SYMBOL(mnt_set_expiry);
2061
2062
2063
2064
2065
2066
2067void mark_mounts_for_expiry(struct list_head *mounts)
2068{
2069 struct mount *mnt, *next;
2070 LIST_HEAD(graveyard);
2071
2072 if (list_empty(mounts))
2073 return;
2074
2075 namespace_lock();
2076 br_write_lock(&vfsmount_lock);
2077
2078
2079
2080
2081
2082
2083
2084 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2085 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2086 propagate_mount_busy(mnt, 1))
2087 continue;
2088 list_move(&mnt->mnt_expire, &graveyard);
2089 }
2090 while (!list_empty(&graveyard)) {
2091 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2092 touch_mnt_namespace(mnt->mnt_ns);
2093 umount_tree(mnt, 1);
2094 }
2095 br_write_unlock(&vfsmount_lock);
2096 namespace_unlock();
2097}
2098
2099EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2100
2101
2102
2103
2104
2105
2106
2107static int select_submounts(struct mount *parent, struct list_head *graveyard)
2108{
2109 struct mount *this_parent = parent;
2110 struct list_head *next;
2111 int found = 0;
2112
2113repeat:
2114 next = this_parent->mnt_mounts.next;
2115resume:
2116 while (next != &this_parent->mnt_mounts) {
2117 struct list_head *tmp = next;
2118 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2119
2120 next = tmp->next;
2121 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2122 continue;
2123
2124
2125
2126 if (!list_empty(&mnt->mnt_mounts)) {
2127 this_parent = mnt;
2128 goto repeat;
2129 }
2130
2131 if (!propagate_mount_busy(mnt, 1)) {
2132 list_move_tail(&mnt->mnt_expire, graveyard);
2133 found++;
2134 }
2135 }
2136
2137
2138
2139 if (this_parent != parent) {
2140 next = this_parent->mnt_child.next;
2141 this_parent = this_parent->mnt_parent;
2142 goto resume;
2143 }
2144 return found;
2145}
2146
2147
2148
2149
2150
2151
2152
2153static void shrink_submounts(struct mount *mnt)
2154{
2155 LIST_HEAD(graveyard);
2156 struct mount *m;
2157
2158
2159 while (select_submounts(mnt, &graveyard)) {
2160 while (!list_empty(&graveyard)) {
2161 m = list_first_entry(&graveyard, struct mount,
2162 mnt_expire);
2163 touch_mnt_namespace(m->mnt_ns);
2164 umount_tree(m, 1);
2165 }
2166 }
2167}
2168
2169
2170
2171
2172
2173
2174
2175static long exact_copy_from_user(void *to, const void __user * from,
2176 unsigned long n)
2177{
2178 char *t = to;
2179 const char __user *f = from;
2180 char c;
2181
2182 if (!access_ok(VERIFY_READ, from, n))
2183 return n;
2184
2185 while (n) {
2186 if (__get_user(c, f)) {
2187 memset(t, 0, n);
2188 break;
2189 }
2190 *t++ = c;
2191 f++;
2192 n--;
2193 }
2194 return n;
2195}
2196
2197int copy_mount_options(const void __user * data, unsigned long *where)
2198{
2199 int i;
2200 unsigned long page;
2201 unsigned long size;
2202
2203 *where = 0;
2204 if (!data)
2205 return 0;
2206
2207 if (!(page = __get_free_page(GFP_KERNEL)))
2208 return -ENOMEM;
2209
2210
2211
2212
2213
2214
2215 size = TASK_SIZE - (unsigned long)data;
2216 if (size > PAGE_SIZE)
2217 size = PAGE_SIZE;
2218
2219 i = size - exact_copy_from_user((void *)page, data, size);
2220 if (!i) {
2221 free_page(page);
2222 return -EFAULT;
2223 }
2224 if (i != PAGE_SIZE)
2225 memset((char *)page + i, 0, PAGE_SIZE - i);
2226 *where = page;
2227 return 0;
2228}
2229
2230int copy_mount_string(const void __user *data, char **where)
2231{
2232 char *tmp;
2233
2234 if (!data) {
2235 *where = NULL;
2236 return 0;
2237 }
2238
2239 tmp = strndup_user(data, PAGE_SIZE);
2240 if (IS_ERR(tmp))
2241 return PTR_ERR(tmp);
2242
2243 *where = tmp;
2244 return 0;
2245}
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261long do_mount(const char *dev_name, const char *dir_name,
2262 const char *type_page, unsigned long flags, void *data_page)
2263{
2264 struct path path;
2265 int retval = 0;
2266 int mnt_flags = 0;
2267
2268
2269 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2270 flags &= ~MS_MGC_MSK;
2271
2272
2273
2274 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
2275 return -EINVAL;
2276
2277 if (data_page)
2278 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2279
2280
2281 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
2282 if (retval)
2283 return retval;
2284
2285 retval = security_sb_mount(dev_name, &path,
2286 type_page, flags, data_page);
2287 if (!retval && !may_mount())
2288 retval = -EPERM;
2289 if (retval)
2290 goto dput_out;
2291
2292
2293 if (!(flags & MS_NOATIME))
2294 mnt_flags |= MNT_RELATIME;
2295
2296
2297 if (flags & MS_NOSUID)
2298 mnt_flags |= MNT_NOSUID;
2299 if (flags & MS_NODEV)
2300 mnt_flags |= MNT_NODEV;
2301 if (flags & MS_NOEXEC)
2302 mnt_flags |= MNT_NOEXEC;
2303 if (flags & MS_NOATIME)
2304 mnt_flags |= MNT_NOATIME;
2305 if (flags & MS_NODIRATIME)
2306 mnt_flags |= MNT_NODIRATIME;
2307 if (flags & MS_STRICTATIME)
2308 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2309 if (flags & MS_RDONLY)
2310 mnt_flags |= MNT_READONLY;
2311
2312 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2313 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2314 MS_STRICTATIME);
2315
2316 if (flags & MS_REMOUNT)
2317 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2318 data_page);
2319 else if (flags & MS_BIND)
2320 retval = do_loopback(&path, dev_name, flags & MS_REC);
2321 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2322 retval = do_change_type(&path, flags);
2323 else if (flags & MS_MOVE)
2324 retval = do_move_mount(&path, dev_name);
2325 else
2326 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2327 dev_name, data_page);
2328dput_out:
2329 path_put(&path);
2330 return retval;
2331}
2332
2333static void free_mnt_ns(struct mnt_namespace *ns)
2334{
2335 proc_free_inum(ns->proc_inum);
2336 put_user_ns(ns->user_ns);
2337 kfree(ns);
2338}
2339
2340
2341
2342
2343
2344
2345
2346
2347static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2348
2349static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2350{
2351 struct mnt_namespace *new_ns;
2352 int ret;
2353
2354 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2355 if (!new_ns)
2356 return ERR_PTR(-ENOMEM);
2357 ret = proc_alloc_inum(&new_ns->proc_inum);
2358 if (ret) {
2359 kfree(new_ns);
2360 return ERR_PTR(ret);
2361 }
2362 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2363 atomic_set(&new_ns->count, 1);
2364 new_ns->root = NULL;
2365 INIT_LIST_HEAD(&new_ns->list);
2366 init_waitqueue_head(&new_ns->poll);
2367 new_ns->event = 0;
2368 new_ns->user_ns = get_user_ns(user_ns);
2369 return new_ns;
2370}
2371
2372
2373
2374
2375
2376static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2377 struct user_namespace *user_ns, struct fs_struct *fs)
2378{
2379 struct mnt_namespace *new_ns;
2380 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2381 struct mount *p, *q;
2382 struct mount *old = mnt_ns->root;
2383 struct mount *new;
2384 int copy_flags;
2385
2386 new_ns = alloc_mnt_ns(user_ns);
2387 if (IS_ERR(new_ns))
2388 return new_ns;
2389
2390 namespace_lock();
2391
2392 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2393 if (user_ns != mnt_ns->user_ns)
2394 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2395 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2396 if (IS_ERR(new)) {
2397 namespace_unlock();
2398 free_mnt_ns(new_ns);
2399 return ERR_CAST(new);
2400 }
2401 new_ns->root = new;
2402 br_write_lock(&vfsmount_lock);
2403 list_add_tail(&new_ns->list, &new->mnt_list);
2404 br_write_unlock(&vfsmount_lock);
2405
2406
2407
2408
2409
2410
2411 p = old;
2412 q = new;
2413 while (p) {
2414 q->mnt_ns = new_ns;
2415 if (fs) {
2416 if (&p->mnt == fs->root.mnt) {
2417 fs->root.mnt = mntget(&q->mnt);
2418 rootmnt = &p->mnt;
2419 }
2420 if (&p->mnt == fs->pwd.mnt) {
2421 fs->pwd.mnt = mntget(&q->mnt);
2422 pwdmnt = &p->mnt;
2423 }
2424 }
2425 p = next_mnt(p, old);
2426 q = next_mnt(q, new);
2427 }
2428 namespace_unlock();
2429
2430 if (rootmnt)
2431 mntput(rootmnt);
2432 if (pwdmnt)
2433 mntput(pwdmnt);
2434
2435 return new_ns;
2436}
2437
2438struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2439 struct user_namespace *user_ns, struct fs_struct *new_fs)
2440{
2441 struct mnt_namespace *new_ns;
2442
2443 BUG_ON(!ns);
2444 get_mnt_ns(ns);
2445
2446 if (!(flags & CLONE_NEWNS))
2447 return ns;
2448
2449 new_ns = dup_mnt_ns(ns, user_ns, new_fs);
2450
2451 put_mnt_ns(ns);
2452 return new_ns;
2453}
2454
2455
2456
2457
2458
2459static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2460{
2461 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2462 if (!IS_ERR(new_ns)) {
2463 struct mount *mnt = real_mount(m);
2464 mnt->mnt_ns = new_ns;
2465 new_ns->root = mnt;
2466 list_add(&mnt->mnt_list, &new_ns->list);
2467 } else {
2468 mntput(m);
2469 }
2470 return new_ns;
2471}
2472
2473struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2474{
2475 struct mnt_namespace *ns;
2476 struct super_block *s;
2477 struct path path;
2478 int err;
2479
2480 ns = create_mnt_ns(mnt);
2481 if (IS_ERR(ns))
2482 return ERR_CAST(ns);
2483
2484 err = vfs_path_lookup(mnt->mnt_root, mnt,
2485 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2486
2487 put_mnt_ns(ns);
2488
2489 if (err)
2490 return ERR_PTR(err);
2491
2492
2493 s = path.mnt->mnt_sb;
2494 atomic_inc(&s->s_active);
2495 mntput(path.mnt);
2496
2497 down_write(&s->s_umount);
2498
2499 return path.dentry;
2500}
2501EXPORT_SYMBOL(mount_subtree);
2502
2503SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2504 char __user *, type, unsigned long, flags, void __user *, data)
2505{
2506 int ret;
2507 char *kernel_type;
2508 struct filename *kernel_dir;
2509 char *kernel_dev;
2510 unsigned long data_page;
2511
2512 ret = copy_mount_string(type, &kernel_type);
2513 if (ret < 0)
2514 goto out_type;
2515
2516 kernel_dir = getname(dir_name);
2517 if (IS_ERR(kernel_dir)) {
2518 ret = PTR_ERR(kernel_dir);
2519 goto out_dir;
2520 }
2521
2522 ret = copy_mount_string(dev_name, &kernel_dev);
2523 if (ret < 0)
2524 goto out_dev;
2525
2526 ret = copy_mount_options(data, &data_page);
2527 if (ret < 0)
2528 goto out_data;
2529
2530 ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
2531 (void *) data_page);
2532
2533 free_page(data_page);
2534out_data:
2535 kfree(kernel_dev);
2536out_dev:
2537 putname(kernel_dir);
2538out_dir:
2539 kfree(kernel_type);
2540out_type:
2541 return ret;
2542}
2543
2544
2545
2546
2547
2548
2549bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2550 const struct path *root)
2551{
2552 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2553 dentry = mnt->mnt_mountpoint;
2554 mnt = mnt->mnt_parent;
2555 }
2556 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2557}
2558
2559int path_is_under(struct path *path1, struct path *path2)
2560{
2561 int res;
2562 br_read_lock(&vfsmount_lock);
2563 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2564 br_read_unlock(&vfsmount_lock);
2565 return res;
2566}
2567EXPORT_SYMBOL(path_is_under);
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2595 const char __user *, put_old)
2596{
2597 struct path new, old, parent_path, root_parent, root;
2598 struct mount *new_mnt, *root_mnt, *old_mnt;
2599 struct mountpoint *old_mp, *root_mp;
2600 int error;
2601
2602 if (!may_mount())
2603 return -EPERM;
2604
2605 error = user_path_dir(new_root, &new);
2606 if (error)
2607 goto out0;
2608
2609 error = user_path_dir(put_old, &old);
2610 if (error)
2611 goto out1;
2612
2613 error = security_sb_pivotroot(&old, &new);
2614 if (error)
2615 goto out2;
2616
2617 get_fs_root(current->fs, &root);
2618 old_mp = lock_mount(&old);
2619 error = PTR_ERR(old_mp);
2620 if (IS_ERR(old_mp))
2621 goto out3;
2622
2623 error = -EINVAL;
2624 new_mnt = real_mount(new.mnt);
2625 root_mnt = real_mount(root.mnt);
2626 old_mnt = real_mount(old.mnt);
2627 if (IS_MNT_SHARED(old_mnt) ||
2628 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2629 IS_MNT_SHARED(root_mnt->mnt_parent))
2630 goto out4;
2631 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2632 goto out4;
2633 error = -ENOENT;
2634 if (d_unlinked(new.dentry))
2635 goto out4;
2636 error = -EBUSY;
2637 if (new_mnt == root_mnt || old_mnt == root_mnt)
2638 goto out4;
2639 error = -EINVAL;
2640 if (root.mnt->mnt_root != root.dentry)
2641 goto out4;
2642 if (!mnt_has_parent(root_mnt))
2643 goto out4;
2644 root_mp = root_mnt->mnt_mp;
2645 if (new.mnt->mnt_root != new.dentry)
2646 goto out4;
2647 if (!mnt_has_parent(new_mnt))
2648 goto out4;
2649
2650 if (!is_path_reachable(old_mnt, old.dentry, &new))
2651 goto out4;
2652 root_mp->m_count++;
2653 br_write_lock(&vfsmount_lock);
2654 detach_mnt(new_mnt, &parent_path);
2655 detach_mnt(root_mnt, &root_parent);
2656
2657 attach_mnt(root_mnt, old_mnt, old_mp);
2658
2659 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2660 touch_mnt_namespace(current->nsproxy->mnt_ns);
2661 br_write_unlock(&vfsmount_lock);
2662 chroot_fs_refs(&root, &new);
2663 put_mountpoint(root_mp);
2664 error = 0;
2665out4:
2666 unlock_mount(old_mp);
2667 if (!error) {
2668 path_put(&root_parent);
2669 path_put(&parent_path);
2670 }
2671out3:
2672 path_put(&root);
2673out2:
2674 path_put(&old);
2675out1:
2676 path_put(&new);
2677out0:
2678 return error;
2679}
2680
2681static void __init init_mount_tree(void)
2682{
2683 struct vfsmount *mnt;
2684 struct mnt_namespace *ns;
2685 struct path root;
2686 struct file_system_type *type;
2687
2688 type = get_fs_type("rootfs");
2689 if (!type)
2690 panic("Can't find rootfs type");
2691 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
2692 put_filesystem(type);
2693 if (IS_ERR(mnt))
2694 panic("Can't create rootfs");
2695
2696 ns = create_mnt_ns(mnt);
2697 if (IS_ERR(ns))
2698 panic("Can't allocate initial namespace");
2699
2700 init_task.nsproxy->mnt_ns = ns;
2701 get_mnt_ns(ns);
2702
2703 root.mnt = mnt;
2704 root.dentry = mnt->mnt_root;
2705
2706 set_fs_pwd(current->fs, &root);
2707 set_fs_root(current->fs, &root);
2708}
2709
2710void __init mnt_init(void)
2711{
2712 unsigned u;
2713 int err;
2714
2715 init_rwsem(&namespace_sem);
2716
2717 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
2718 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2719
2720 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2721 mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2722
2723 if (!mount_hashtable || !mountpoint_hashtable)
2724 panic("Failed to allocate mount hash table\n");
2725
2726 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2727
2728 for (u = 0; u < HASH_SIZE; u++)
2729 INIT_LIST_HEAD(&mount_hashtable[u]);
2730 for (u = 0; u < HASH_SIZE; u++)
2731 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2732
2733 br_lock_init(&vfsmount_lock);
2734
2735 err = sysfs_init();
2736 if (err)
2737 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2738 __func__, err);
2739 fs_kobj = kobject_create_and_add("fs", NULL);
2740 if (!fs_kobj)
2741 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2742 init_rootfs();
2743 init_mount_tree();
2744}
2745
2746void put_mnt_ns(struct mnt_namespace *ns)
2747{
2748 if (!atomic_dec_and_test(&ns->count))
2749 return;
2750 namespace_lock();
2751 br_write_lock(&vfsmount_lock);
2752 umount_tree(ns->root, 0);
2753 br_write_unlock(&vfsmount_lock);
2754 namespace_unlock();
2755 free_mnt_ns(ns);
2756}
2757
2758struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2759{
2760 struct vfsmount *mnt;
2761 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
2762 if (!IS_ERR(mnt)) {
2763
2764
2765
2766
2767 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
2768 }
2769 return mnt;
2770}
2771EXPORT_SYMBOL_GPL(kern_mount_data);
2772
2773void kern_unmount(struct vfsmount *mnt)
2774{
2775
2776 if (!IS_ERR_OR_NULL(mnt)) {
2777 br_write_lock(&vfsmount_lock);
2778 real_mount(mnt)->mnt_ns = NULL;
2779 br_write_unlock(&vfsmount_lock);
2780 mntput(mnt);
2781 }
2782}
2783EXPORT_SYMBOL(kern_unmount);
2784
2785bool our_mnt(struct vfsmount *mnt)
2786{
2787 return check_mnt(real_mount(mnt));
2788}
2789
2790bool current_chrooted(void)
2791{
2792
2793 struct path ns_root;
2794 struct path fs_root;
2795 bool chrooted;
2796
2797
2798 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
2799 ns_root.dentry = ns_root.mnt->mnt_root;
2800 path_get(&ns_root);
2801 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
2802 ;
2803
2804 get_fs_root(current->fs, &fs_root);
2805
2806 chrooted = !path_equal(&fs_root, &ns_root);
2807
2808 path_put(&fs_root);
2809 path_put(&ns_root);
2810
2811 return chrooted;
2812}
2813
2814void update_mnt_policy(struct user_namespace *userns)
2815{
2816 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
2817 struct mount *mnt;
2818
2819 down_read(&namespace_sem);
2820 list_for_each_entry(mnt, &ns->list, mnt_list) {
2821 switch (mnt->mnt.mnt_sb->s_magic) {
2822 case SYSFS_MAGIC:
2823 userns->may_mount_sysfs = true;
2824 break;
2825 case PROC_SUPER_MAGIC:
2826 userns->may_mount_proc = true;
2827 break;
2828 }
2829 if (userns->may_mount_sysfs && userns->may_mount_proc)
2830 break;
2831 }
2832 up_read(&namespace_sem);
2833}
2834
2835static void *mntns_get(struct task_struct *task)
2836{
2837 struct mnt_namespace *ns = NULL;
2838 struct nsproxy *nsproxy;
2839
2840 rcu_read_lock();
2841 nsproxy = task_nsproxy(task);
2842 if (nsproxy) {
2843 ns = nsproxy->mnt_ns;
2844 get_mnt_ns(ns);
2845 }
2846 rcu_read_unlock();
2847
2848 return ns;
2849}
2850
2851static void mntns_put(void *ns)
2852{
2853 put_mnt_ns(ns);
2854}
2855
2856static int mntns_install(struct nsproxy *nsproxy, void *ns)
2857{
2858 struct fs_struct *fs = current->fs;
2859 struct mnt_namespace *mnt_ns = ns;
2860 struct path root;
2861
2862 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
2863 !nsown_capable(CAP_SYS_CHROOT) ||
2864 !nsown_capable(CAP_SYS_ADMIN))
2865 return -EPERM;
2866
2867 if (fs->users != 1)
2868 return -EINVAL;
2869
2870 get_mnt_ns(mnt_ns);
2871 put_mnt_ns(nsproxy->mnt_ns);
2872 nsproxy->mnt_ns = mnt_ns;
2873
2874
2875 root.mnt = &mnt_ns->root->mnt;
2876 root.dentry = mnt_ns->root->mnt.mnt_root;
2877 path_get(&root);
2878 while(d_mountpoint(root.dentry) && follow_down_one(&root))
2879 ;
2880
2881
2882 set_fs_pwd(fs, &root);
2883 set_fs_root(fs, &root);
2884
2885 path_put(&root);
2886 return 0;
2887}
2888
2889static unsigned int mntns_inum(void *ns)
2890{
2891 struct mnt_namespace *mnt_ns = ns;
2892 return mnt_ns->proc_inum;
2893}
2894
2895const struct proc_ns_operations mntns_operations = {
2896 .name = "mnt",
2897 .type = CLONE_NEWNS,
2898 .get = mntns_get,
2899 .put = mntns_put,
2900 .install = mntns_install,
2901 .inum = mntns_inum,
2902};
2903