1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/slab.h>
13#include <linux/sched.h>
14#include <linux/smp_lock.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/quotaops.h>
18#include <linux/acct.h>
19#include <linux/capability.h>
20#include <linux/module.h>
21#include <linux/sysfs.h>
22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h>
24#include <linux/namei.h>
25#include <linux/security.h>
26#include <linux/mount.h>
27#include <linux/ramfs.h>
28#include <asm/uaccess.h>
29#include <asm/unistd.h>
30#include "pnode.h"
31#include "internal.h"
32
33
34__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
35
36static int event;
37
38static struct list_head *mount_hashtable __read_mostly;
39static int hash_mask __read_mostly, hash_bits __read_mostly;
40static struct kmem_cache *mnt_cache __read_mostly;
41static struct rw_semaphore namespace_sem;
42
43
44decl_subsys(fs, NULL, NULL);
45EXPORT_SYMBOL_GPL(fs_subsys);
46
47static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
48{
49 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
50 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
51 tmp = tmp + (tmp >> hash_bits);
52 return tmp & hash_mask;
53}
54
55struct vfsmount *alloc_vfsmnt(const char *name)
56{
57 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
58 if (mnt) {
59 atomic_set(&mnt->mnt_count, 1);
60 INIT_LIST_HEAD(&mnt->mnt_hash);
61 INIT_LIST_HEAD(&mnt->mnt_child);
62 INIT_LIST_HEAD(&mnt->mnt_mounts);
63 INIT_LIST_HEAD(&mnt->mnt_list);
64 INIT_LIST_HEAD(&mnt->mnt_expire);
65 INIT_LIST_HEAD(&mnt->mnt_share);
66 INIT_LIST_HEAD(&mnt->mnt_slave_list);
67 INIT_LIST_HEAD(&mnt->mnt_slave);
68 if (name) {
69 int size = strlen(name) + 1;
70 char *newname = kmalloc(size, GFP_KERNEL);
71 if (newname) {
72 memcpy(newname, name, size);
73 mnt->mnt_devname = newname;
74 }
75 }
76 }
77 return mnt;
78}
79
80int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
81{
82 mnt->mnt_sb = sb;
83 mnt->mnt_root = dget(sb->s_root);
84 return 0;
85}
86
87EXPORT_SYMBOL(simple_set_mnt);
88
89void free_vfsmnt(struct vfsmount *mnt)
90{
91 kfree(mnt->mnt_devname);
92 kmem_cache_free(mnt_cache, mnt);
93}
94
95
96
97
98
99struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
100 int dir)
101{
102 struct list_head *head = mount_hashtable + hash(mnt, dentry);
103 struct list_head *tmp = head;
104 struct vfsmount *p, *found = NULL;
105
106 for (;;) {
107 tmp = dir ? tmp->next : tmp->prev;
108 p = NULL;
109 if (tmp == head)
110 break;
111 p = list_entry(tmp, struct vfsmount, mnt_hash);
112 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
113 found = p;
114 break;
115 }
116 }
117 return found;
118}
119
120
121
122
123
124struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
125{
126 struct vfsmount *child_mnt;
127 spin_lock(&vfsmount_lock);
128 if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
129 mntget(child_mnt);
130 spin_unlock(&vfsmount_lock);
131 return child_mnt;
132}
133
134static inline int check_mnt(struct vfsmount *mnt)
135{
136 return mnt->mnt_ns == current->nsproxy->mnt_ns;
137}
138
139static void touch_mnt_namespace(struct mnt_namespace *ns)
140{
141 if (ns) {
142 ns->event = ++event;
143 wake_up_interruptible(&ns->poll);
144 }
145}
146
147static void __touch_mnt_namespace(struct mnt_namespace *ns)
148{
149 if (ns && ns->event != event) {
150 ns->event = event;
151 wake_up_interruptible(&ns->poll);
152 }
153}
154
155static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
156{
157 old_nd->dentry = mnt->mnt_mountpoint;
158 old_nd->mnt = mnt->mnt_parent;
159 mnt->mnt_parent = mnt;
160 mnt->mnt_mountpoint = mnt->mnt_root;
161 list_del_init(&mnt->mnt_child);
162 list_del_init(&mnt->mnt_hash);
163 old_nd->dentry->d_mounted--;
164}
165
166void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
167 struct vfsmount *child_mnt)
168{
169 child_mnt->mnt_parent = mntget(mnt);
170 child_mnt->mnt_mountpoint = dget(dentry);
171 dentry->d_mounted++;
172}
173
174static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
175{
176 mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
177 list_add_tail(&mnt->mnt_hash, mount_hashtable +
178 hash(nd->mnt, nd->dentry));
179 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
180}
181
182
183
184
185static void commit_tree(struct vfsmount *mnt)
186{
187 struct vfsmount *parent = mnt->mnt_parent;
188 struct vfsmount *m;
189 LIST_HEAD(head);
190 struct mnt_namespace *n = parent->mnt_ns;
191
192 BUG_ON(parent == mnt);
193
194 list_add_tail(&head, &mnt->mnt_list);
195 list_for_each_entry(m, &head, mnt_list)
196 m->mnt_ns = n;
197 list_splice(&head, n->list.prev);
198
199 list_add_tail(&mnt->mnt_hash, mount_hashtable +
200 hash(parent, mnt->mnt_mountpoint));
201 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
202 touch_mnt_namespace(n);
203}
204
205static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
206{
207 struct list_head *next = p->mnt_mounts.next;
208 if (next == &p->mnt_mounts) {
209 while (1) {
210 if (p == root)
211 return NULL;
212 next = p->mnt_child.next;
213 if (next != &p->mnt_parent->mnt_mounts)
214 break;
215 p = p->mnt_parent;
216 }
217 }
218 return list_entry(next, struct vfsmount, mnt_child);
219}
220
221static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
222{
223 struct list_head *prev = p->mnt_mounts.prev;
224 while (prev != &p->mnt_mounts) {
225 p = list_entry(prev, struct vfsmount, mnt_child);
226 prev = p->mnt_mounts.prev;
227 }
228 return p;
229}
230
231static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
232 int flag)
233{
234 struct super_block *sb = old->mnt_sb;
235 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
236
237 if (mnt) {
238 mnt->mnt_flags = old->mnt_flags;
239 atomic_inc(&sb->s_active);
240 mnt->mnt_sb = sb;
241 mnt->mnt_root = dget(root);
242 mnt->mnt_mountpoint = mnt->mnt_root;
243 mnt->mnt_parent = mnt;
244
245 if (flag & CL_SLAVE) {
246 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
247 mnt->mnt_master = old;
248 CLEAR_MNT_SHARED(mnt);
249 } else if (!(flag & CL_PRIVATE)) {
250 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
251 list_add(&mnt->mnt_share, &old->mnt_share);
252 if (IS_MNT_SLAVE(old))
253 list_add(&mnt->mnt_slave, &old->mnt_slave);
254 mnt->mnt_master = old->mnt_master;
255 }
256 if (flag & CL_MAKE_SHARED)
257 set_mnt_shared(mnt);
258
259
260
261 if (flag & CL_EXPIRE) {
262 spin_lock(&vfsmount_lock);
263 if (!list_empty(&old->mnt_expire))
264 list_add(&mnt->mnt_expire, &old->mnt_expire);
265 spin_unlock(&vfsmount_lock);
266 }
267 }
268 return mnt;
269}
270
271static inline void __mntput(struct vfsmount *mnt)
272{
273 struct super_block *sb = mnt->mnt_sb;
274 dput(mnt->mnt_root);
275 free_vfsmnt(mnt);
276 deactivate_super(sb);
277}
278
279void mntput_no_expire(struct vfsmount *mnt)
280{
281repeat:
282 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
283 if (likely(!mnt->mnt_pinned)) {
284 spin_unlock(&vfsmount_lock);
285 __mntput(mnt);
286 return;
287 }
288 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
289 mnt->mnt_pinned = 0;
290 spin_unlock(&vfsmount_lock);
291 acct_auto_close_mnt(mnt);
292 security_sb_umount_close(mnt);
293 goto repeat;
294 }
295}
296
297EXPORT_SYMBOL(mntput_no_expire);
298
299void mnt_pin(struct vfsmount *mnt)
300{
301 spin_lock(&vfsmount_lock);
302 mnt->mnt_pinned++;
303 spin_unlock(&vfsmount_lock);
304}
305
306EXPORT_SYMBOL(mnt_pin);
307
308void mnt_unpin(struct vfsmount *mnt)
309{
310 spin_lock(&vfsmount_lock);
311 if (mnt->mnt_pinned) {
312 atomic_inc(&mnt->mnt_count);
313 mnt->mnt_pinned--;
314 }
315 spin_unlock(&vfsmount_lock);
316}
317
318EXPORT_SYMBOL(mnt_unpin);
319
320
321static void *m_start(struct seq_file *m, loff_t *pos)
322{
323 struct mnt_namespace *n = m->private;
324
325 down_read(&namespace_sem);
326 return seq_list_start(&n->list, *pos);
327}
328
329static void *m_next(struct seq_file *m, void *v, loff_t *pos)
330{
331 struct mnt_namespace *n = m->private;
332
333 return seq_list_next(v, &n->list, pos);
334}
335
336static void m_stop(struct seq_file *m, void *v)
337{
338 up_read(&namespace_sem);
339}
340
341static inline void mangle(struct seq_file *m, const char *s)
342{
343 seq_escape(m, s, " \t\n\\");
344}
345
346static int show_vfsmnt(struct seq_file *m, void *v)
347{
348 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
349 int err = 0;
350 static struct proc_fs_info {
351 int flag;
352 char *str;
353 } fs_info[] = {
354 { MS_SYNCHRONOUS, ",sync" },
355 { MS_DIRSYNC, ",dirsync" },
356 { MS_MANDLOCK, ",mand" },
357 { 0, NULL }
358 };
359 static struct proc_fs_info mnt_info[] = {
360 { MNT_NOSUID, ",nosuid" },
361 { MNT_NODEV, ",nodev" },
362 { MNT_NOEXEC, ",noexec" },
363 { MNT_NOATIME, ",noatime" },
364 { MNT_NODIRATIME, ",nodiratime" },
365 { MNT_RELATIME, ",relatime" },
366 { 0, NULL }
367 };
368 struct proc_fs_info *fs_infop;
369
370 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
371 seq_putc(m, ' ');
372 seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
373 seq_putc(m, ' ');
374 mangle(m, mnt->mnt_sb->s_type->name);
375 if (mnt->mnt_sb->s_subtype && mnt->mnt_sb->s_subtype[0]) {
376 seq_putc(m, '.');
377 mangle(m, mnt->mnt_sb->s_subtype);
378 }
379 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
380 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
381 if (mnt->mnt_sb->s_flags & fs_infop->flag)
382 seq_puts(m, fs_infop->str);
383 }
384 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
385 if (mnt->mnt_flags & fs_infop->flag)
386 seq_puts(m, fs_infop->str);
387 }
388 if (mnt->mnt_sb->s_op->show_options)
389 err = mnt->mnt_sb->s_op->show_options(m, mnt);
390 seq_puts(m, " 0 0\n");
391 return err;
392}
393
394struct seq_operations mounts_op = {
395 .start = m_start,
396 .next = m_next,
397 .stop = m_stop,
398 .show = show_vfsmnt
399};
400
401static int show_vfsstat(struct seq_file *m, void *v)
402{
403 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
404 int err = 0;
405
406
407 if (mnt->mnt_devname) {
408 seq_puts(m, "device ");
409 mangle(m, mnt->mnt_devname);
410 } else
411 seq_puts(m, "no device");
412
413
414 seq_puts(m, " mounted on ");
415 seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
416 seq_putc(m, ' ');
417
418
419 seq_puts(m, "with fstype ");
420 mangle(m, mnt->mnt_sb->s_type->name);
421
422
423 if (mnt->mnt_sb->s_op->show_stats) {
424 seq_putc(m, ' ');
425 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
426 }
427
428 seq_putc(m, '\n');
429 return err;
430}
431
432struct seq_operations mountstats_op = {
433 .start = m_start,
434 .next = m_next,
435 .stop = m_stop,
436 .show = show_vfsstat,
437};
438
439
440
441
442
443
444
445
446
447int may_umount_tree(struct vfsmount *mnt)
448{
449 int actual_refs = 0;
450 int minimum_refs = 0;
451 struct vfsmount *p;
452
453 spin_lock(&vfsmount_lock);
454 for (p = mnt; p; p = next_mnt(p, mnt)) {
455 actual_refs += atomic_read(&p->mnt_count);
456 minimum_refs += 2;
457 }
458 spin_unlock(&vfsmount_lock);
459
460 if (actual_refs > minimum_refs)
461 return 0;
462
463 return 1;
464}
465
466EXPORT_SYMBOL(may_umount_tree);
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481int may_umount(struct vfsmount *mnt)
482{
483 int ret = 1;
484 spin_lock(&vfsmount_lock);
485 if (propagate_mount_busy(mnt, 2))
486 ret = 0;
487 spin_unlock(&vfsmount_lock);
488 return ret;
489}
490
491EXPORT_SYMBOL(may_umount);
492
493void release_mounts(struct list_head *head)
494{
495 struct vfsmount *mnt;
496 while (!list_empty(head)) {
497 mnt = list_first_entry(head, struct vfsmount, mnt_hash);
498 list_del_init(&mnt->mnt_hash);
499 if (mnt->mnt_parent != mnt) {
500 struct dentry *dentry;
501 struct vfsmount *m;
502 spin_lock(&vfsmount_lock);
503 dentry = mnt->mnt_mountpoint;
504 m = mnt->mnt_parent;
505 mnt->mnt_mountpoint = mnt->mnt_root;
506 mnt->mnt_parent = mnt;
507 spin_unlock(&vfsmount_lock);
508 dput(dentry);
509 mntput(m);
510 }
511 mntput(mnt);
512 }
513}
514
515void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
516{
517 struct vfsmount *p;
518
519 for (p = mnt; p; p = next_mnt(p, mnt))
520 list_move(&p->mnt_hash, kill);
521
522 if (propagate)
523 propagate_umount(kill);
524
525 list_for_each_entry(p, kill, mnt_hash) {
526 list_del_init(&p->mnt_expire);
527 list_del_init(&p->mnt_list);
528 __touch_mnt_namespace(p->mnt_ns);
529 p->mnt_ns = NULL;
530 list_del_init(&p->mnt_child);
531 if (p->mnt_parent != p)
532 p->mnt_mountpoint->d_mounted--;
533 change_mnt_propagation(p, MS_PRIVATE);
534 }
535}
536
537static int do_umount(struct vfsmount *mnt, int flags)
538{
539 struct super_block *sb = mnt->mnt_sb;
540 int retval;
541 LIST_HEAD(umount_list);
542
543 retval = security_sb_umount(mnt, flags);
544 if (retval)
545 return retval;
546
547
548
549
550
551
552
553 if (flags & MNT_EXPIRE) {
554 if (mnt == current->fs->rootmnt ||
555 flags & (MNT_FORCE | MNT_DETACH))
556 return -EINVAL;
557
558 if (atomic_read(&mnt->mnt_count) != 2)
559 return -EBUSY;
560
561 if (!xchg(&mnt->mnt_expiry_mark, 1))
562 return -EAGAIN;
563 }
564
565
566
567
568
569
570
571
572
573
574
575 lock_kernel();
576 if (sb->s_op->umount_begin)
577 sb->s_op->umount_begin(mnt, flags);
578 unlock_kernel();
579
580
581
582
583
584
585
586
587
588
589 if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) {
590
591
592
593
594 down_write(&sb->s_umount);
595 if (!(sb->s_flags & MS_RDONLY)) {
596 lock_kernel();
597 DQUOT_OFF(sb);
598 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
599 unlock_kernel();
600 }
601 up_write(&sb->s_umount);
602 return retval;
603 }
604
605 down_write(&namespace_sem);
606 spin_lock(&vfsmount_lock);
607 event++;
608
609 retval = -EBUSY;
610 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
611 if (!list_empty(&mnt->mnt_list))
612 umount_tree(mnt, 1, &umount_list);
613 retval = 0;
614 }
615 spin_unlock(&vfsmount_lock);
616 if (retval)
617 security_sb_umount_busy(mnt);
618 up_write(&namespace_sem);
619 release_mounts(&umount_list);
620 return retval;
621}
622
623
624
625
626
627
628
629
630
631asmlinkage long sys_umount(char __user * name, int flags)
632{
633 struct nameidata nd;
634 int retval;
635
636 retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
637 if (retval)
638 goto out;
639 retval = -EINVAL;
640 if (nd.dentry != nd.mnt->mnt_root)
641 goto dput_and_out;
642 if (!check_mnt(nd.mnt))
643 goto dput_and_out;
644
645 retval = -EPERM;
646 if (!capable(CAP_SYS_ADMIN))
647 goto dput_and_out;
648
649 retval = do_umount(nd.mnt, flags);
650dput_and_out:
651 path_release_on_umount(&nd);
652out:
653 return retval;
654}
655
656#ifdef __ARCH_WANT_SYS_OLDUMOUNT
657
658
659
660
661asmlinkage long sys_oldumount(char __user * name)
662{
663 return sys_umount(name, 0);
664}
665
666#endif
667
668static int mount_is_safe(struct nameidata *nd)
669{
670 if (capable(CAP_SYS_ADMIN))
671 return 0;
672 return -EPERM;
673#ifdef notyet
674 if (S_ISLNK(nd->dentry->d_inode->i_mode))
675 return -EPERM;
676 if (nd->dentry->d_inode->i_mode & S_ISVTX) {
677 if (current->uid != nd->dentry->d_inode->i_uid)
678 return -EPERM;
679 }
680 if (vfs_permission(nd, MAY_WRITE))
681 return -EPERM;
682 return 0;
683#endif
684}
685
686static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
687{
688 while (1) {
689 if (d == dentry)
690 return 1;
691 if (d == NULL || d == d->d_parent)
692 return 0;
693 d = d->d_parent;
694 }
695}
696
697struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
698 int flag)
699{
700 struct vfsmount *res, *p, *q, *r, *s;
701 struct nameidata nd;
702
703 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
704 return NULL;
705
706 res = q = clone_mnt(mnt, dentry, flag);
707 if (!q)
708 goto Enomem;
709 q->mnt_mountpoint = mnt->mnt_mountpoint;
710
711 p = mnt;
712 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
713 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
714 continue;
715
716 for (s = r; s; s = next_mnt(s, r)) {
717 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
718 s = skip_mnt_tree(s);
719 continue;
720 }
721 while (p != s->mnt_parent) {
722 p = p->mnt_parent;
723 q = q->mnt_parent;
724 }
725 p = s;
726 nd.mnt = q;
727 nd.dentry = p->mnt_mountpoint;
728 q = clone_mnt(p, p->mnt_root, flag);
729 if (!q)
730 goto Enomem;
731 spin_lock(&vfsmount_lock);
732 list_add_tail(&q->mnt_list, &res->mnt_list);
733 attach_mnt(q, &nd);
734 spin_unlock(&vfsmount_lock);
735 }
736 }
737 return res;
738Enomem:
739 if (res) {
740 LIST_HEAD(umount_list);
741 spin_lock(&vfsmount_lock);
742 umount_tree(res, 0, &umount_list);
743 spin_unlock(&vfsmount_lock);
744 release_mounts(&umount_list);
745 }
746 return NULL;
747}
748
749struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
750{
751 struct vfsmount *tree;
752 down_read(&namespace_sem);
753 tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
754 up_read(&namespace_sem);
755 return tree;
756}
757
758void drop_collected_mounts(struct vfsmount *mnt)
759{
760 LIST_HEAD(umount_list);
761 down_read(&namespace_sem);
762 spin_lock(&vfsmount_lock);
763 umount_tree(mnt, 0, &umount_list);
764 spin_unlock(&vfsmount_lock);
765 up_read(&namespace_sem);
766 release_mounts(&umount_list);
767}
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832static int attach_recursive_mnt(struct vfsmount *source_mnt,
833 struct nameidata *nd, struct nameidata *parent_nd)
834{
835 LIST_HEAD(tree_list);
836 struct vfsmount *dest_mnt = nd->mnt;
837 struct dentry *dest_dentry = nd->dentry;
838 struct vfsmount *child, *p;
839
840 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
841 return -EINVAL;
842
843 if (IS_MNT_SHARED(dest_mnt)) {
844 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
845 set_mnt_shared(p);
846 }
847
848 spin_lock(&vfsmount_lock);
849 if (parent_nd) {
850 detach_mnt(source_mnt, parent_nd);
851 attach_mnt(source_mnt, nd);
852 touch_mnt_namespace(current->nsproxy->mnt_ns);
853 } else {
854 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
855 commit_tree(source_mnt);
856 }
857
858 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
859 list_del_init(&child->mnt_hash);
860 commit_tree(child);
861 }
862 spin_unlock(&vfsmount_lock);
863 return 0;
864}
865
866static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
867{
868 int err;
869 if (mnt->mnt_sb->s_flags & MS_NOUSER)
870 return -EINVAL;
871
872 if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
873 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
874 return -ENOTDIR;
875
876 err = -ENOENT;
877 mutex_lock(&nd->dentry->d_inode->i_mutex);
878 if (IS_DEADDIR(nd->dentry->d_inode))
879 goto out_unlock;
880
881 err = security_sb_check_sb(mnt, nd);
882 if (err)
883 goto out_unlock;
884
885 err = -ENOENT;
886 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
887 err = attach_recursive_mnt(mnt, nd, NULL);
888out_unlock:
889 mutex_unlock(&nd->dentry->d_inode->i_mutex);
890 if (!err)
891 security_sb_post_addmount(mnt, nd);
892 return err;
893}
894
895
896
897
898static int do_change_type(struct nameidata *nd, int flag)
899{
900 struct vfsmount *m, *mnt = nd->mnt;
901 int recurse = flag & MS_REC;
902 int type = flag & ~MS_REC;
903
904 if (!capable(CAP_SYS_ADMIN))
905 return -EPERM;
906
907 if (nd->dentry != nd->mnt->mnt_root)
908 return -EINVAL;
909
910 down_write(&namespace_sem);
911 spin_lock(&vfsmount_lock);
912 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
913 change_mnt_propagation(m, type);
914 spin_unlock(&vfsmount_lock);
915 up_write(&namespace_sem);
916 return 0;
917}
918
919
920
921
922static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
923{
924 struct nameidata old_nd;
925 struct vfsmount *mnt = NULL;
926 int err = mount_is_safe(nd);
927 if (err)
928 return err;
929 if (!old_name || !*old_name)
930 return -EINVAL;
931 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
932 if (err)
933 return err;
934
935 down_write(&namespace_sem);
936 err = -EINVAL;
937 if (IS_MNT_UNBINDABLE(old_nd.mnt))
938 goto out;
939
940 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
941 goto out;
942
943 err = -ENOMEM;
944 if (recurse)
945 mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0);
946 else
947 mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0);
948
949 if (!mnt)
950 goto out;
951
952 err = graft_tree(mnt, nd);
953 if (err) {
954 LIST_HEAD(umount_list);
955 spin_lock(&vfsmount_lock);
956 umount_tree(mnt, 0, &umount_list);
957 spin_unlock(&vfsmount_lock);
958 release_mounts(&umount_list);
959 }
960
961out:
962 up_write(&namespace_sem);
963 path_release(&old_nd);
964 return err;
965}
966
967
968
969
970
971
972static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
973 void *data)
974{
975 int err;
976 struct super_block *sb = nd->mnt->mnt_sb;
977
978 if (!capable(CAP_SYS_ADMIN))
979 return -EPERM;
980
981 if (!check_mnt(nd->mnt))
982 return -EINVAL;
983
984 if (nd->dentry != nd->mnt->mnt_root)
985 return -EINVAL;
986
987 down_write(&sb->s_umount);
988 err = do_remount_sb(sb, flags, data, 0);
989 if (!err)
990 nd->mnt->mnt_flags = mnt_flags;
991 up_write(&sb->s_umount);
992 if (!err)
993 security_sb_post_remount(nd->mnt, flags, data);
994 return err;
995}
996
997static inline int tree_contains_unbindable(struct vfsmount *mnt)
998{
999 struct vfsmount *p;
1000 for (p = mnt; p; p = next_mnt(p, mnt)) {
1001 if (IS_MNT_UNBINDABLE(p))
1002 return 1;
1003 }
1004 return 0;
1005}
1006
1007static int do_move_mount(struct nameidata *nd, char *old_name)
1008{
1009 struct nameidata old_nd, parent_nd;
1010 struct vfsmount *p;
1011 int err = 0;
1012 if (!capable(CAP_SYS_ADMIN))
1013 return -EPERM;
1014 if (!old_name || !*old_name)
1015 return -EINVAL;
1016 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
1017 if (err)
1018 return err;
1019
1020 down_write(&namespace_sem);
1021 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
1022 ;
1023 err = -EINVAL;
1024 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt))
1025 goto out;
1026
1027 err = -ENOENT;
1028 mutex_lock(&nd->dentry->d_inode->i_mutex);
1029 if (IS_DEADDIR(nd->dentry->d_inode))
1030 goto out1;
1031
1032 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
1033 goto out1;
1034
1035 err = -EINVAL;
1036 if (old_nd.dentry != old_nd.mnt->mnt_root)
1037 goto out1;
1038
1039 if (old_nd.mnt == old_nd.mnt->mnt_parent)
1040 goto out1;
1041
1042 if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
1043 S_ISDIR(old_nd.dentry->d_inode->i_mode))
1044 goto out1;
1045
1046
1047
1048 if (old_nd.mnt->mnt_parent && IS_MNT_SHARED(old_nd.mnt->mnt_parent))
1049 goto out1;
1050
1051
1052
1053
1054 if (IS_MNT_SHARED(nd->mnt) && tree_contains_unbindable(old_nd.mnt))
1055 goto out1;
1056 err = -ELOOP;
1057 for (p = nd->mnt; p->mnt_parent != p; p = p->mnt_parent)
1058 if (p == old_nd.mnt)
1059 goto out1;
1060
1061 if ((err = attach_recursive_mnt(old_nd.mnt, nd, &parent_nd)))
1062 goto out1;
1063
1064 spin_lock(&vfsmount_lock);
1065
1066
1067 list_del_init(&old_nd.mnt->mnt_expire);
1068 spin_unlock(&vfsmount_lock);
1069out1:
1070 mutex_unlock(&nd->dentry->d_inode->i_mutex);
1071out:
1072 up_write(&namespace_sem);
1073 if (!err)
1074 path_release(&parent_nd);
1075 path_release(&old_nd);
1076 return err;
1077}
1078
1079
1080
1081
1082
1083static int do_new_mount(struct nameidata *nd, char *type, int flags,
1084 int mnt_flags, char *name, void *data)
1085{
1086 struct vfsmount *mnt;
1087
1088 if (!type || !memchr(type, 0, PAGE_SIZE))
1089 return -EINVAL;
1090
1091
1092 if (!capable(CAP_SYS_ADMIN))
1093 return -EPERM;
1094
1095 mnt = do_kern_mount(type, flags, name, data);
1096 if (IS_ERR(mnt))
1097 return PTR_ERR(mnt);
1098
1099 return do_add_mount(mnt, nd, mnt_flags, NULL);
1100}
1101
1102
1103
1104
1105
1106int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1107 int mnt_flags, struct list_head *fslist)
1108{
1109 int err;
1110
1111 down_write(&namespace_sem);
1112
1113 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
1114 ;
1115 err = -EINVAL;
1116 if (!check_mnt(nd->mnt))
1117 goto unlock;
1118
1119
1120 err = -EBUSY;
1121 if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
1122 nd->mnt->mnt_root == nd->dentry)
1123 goto unlock;
1124
1125 err = -EINVAL;
1126 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1127 goto unlock;
1128
1129 newmnt->mnt_flags = mnt_flags;
1130 if ((err = graft_tree(newmnt, nd)))
1131 goto unlock;
1132
1133 if (fslist) {
1134
1135 spin_lock(&vfsmount_lock);
1136 list_add_tail(&newmnt->mnt_expire, fslist);
1137 spin_unlock(&vfsmount_lock);
1138 }
1139 up_write(&namespace_sem);
1140 return 0;
1141
1142unlock:
1143 up_write(&namespace_sem);
1144 mntput(newmnt);
1145 return err;
1146}
1147
1148EXPORT_SYMBOL_GPL(do_add_mount);
1149
1150static void expire_mount(struct vfsmount *mnt, struct list_head *mounts,
1151 struct list_head *umounts)
1152{
1153 spin_lock(&vfsmount_lock);
1154
1155
1156
1157
1158
1159 if (mnt->mnt_parent == mnt) {
1160 spin_unlock(&vfsmount_lock);
1161 return;
1162 }
1163
1164
1165
1166
1167
1168 if (!propagate_mount_busy(mnt, 2)) {
1169
1170 touch_mnt_namespace(mnt->mnt_ns);
1171 list_del_init(&mnt->mnt_list);
1172 mnt->mnt_ns = NULL;
1173 umount_tree(mnt, 1, umounts);
1174 spin_unlock(&vfsmount_lock);
1175 } else {
1176
1177
1178
1179
1180 list_add_tail(&mnt->mnt_expire, mounts);
1181 spin_unlock(&vfsmount_lock);
1182 }
1183}
1184
1185
1186
1187
1188
1189
1190
1191static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts)
1192{
1193 struct mnt_namespace *ns;
1194 struct vfsmount *mnt;
1195
1196 while (!list_empty(graveyard)) {
1197 LIST_HEAD(umounts);
1198 mnt = list_first_entry(graveyard, struct vfsmount, mnt_expire);
1199 list_del_init(&mnt->mnt_expire);
1200
1201
1202
1203 ns = mnt->mnt_ns;
1204 if (!ns || !ns->root)
1205 continue;
1206 get_mnt_ns(ns);
1207
1208 spin_unlock(&vfsmount_lock);
1209 down_write(&namespace_sem);
1210 expire_mount(mnt, mounts, &umounts);
1211 up_write(&namespace_sem);
1212 release_mounts(&umounts);
1213 mntput(mnt);
1214 put_mnt_ns(ns);
1215 spin_lock(&vfsmount_lock);
1216 }
1217}
1218
1219
1220
1221
1222
1223
1224void mark_mounts_for_expiry(struct list_head *mounts)
1225{
1226 struct vfsmount *mnt, *next;
1227 LIST_HEAD(graveyard);
1228
1229 if (list_empty(mounts))
1230 return;
1231
1232 spin_lock(&vfsmount_lock);
1233
1234
1235
1236
1237
1238
1239
1240 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1241 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1242 atomic_read(&mnt->mnt_count) != 1)
1243 continue;
1244
1245 mntget(mnt);
1246 list_move(&mnt->mnt_expire, &graveyard);
1247 }
1248
1249 expire_mount_list(&graveyard, mounts);
1250
1251 spin_unlock(&vfsmount_lock);
1252}
1253
1254EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1255
1256
1257
1258
1259
1260
1261
1262static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1263{
1264 struct vfsmount *this_parent = parent;
1265 struct list_head *next;
1266 int found = 0;
1267
1268repeat:
1269 next = this_parent->mnt_mounts.next;
1270resume:
1271 while (next != &this_parent->mnt_mounts) {
1272 struct list_head *tmp = next;
1273 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1274
1275 next = tmp->next;
1276 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1277 continue;
1278
1279
1280
1281 if (!list_empty(&mnt->mnt_mounts)) {
1282 this_parent = mnt;
1283 goto repeat;
1284 }
1285
1286 if (!propagate_mount_busy(mnt, 1)) {
1287 mntget(mnt);
1288 list_move_tail(&mnt->mnt_expire, graveyard);
1289 found++;
1290 }
1291 }
1292
1293
1294
1295 if (this_parent != parent) {
1296 next = this_parent->mnt_child.next;
1297 this_parent = this_parent->mnt_parent;
1298 goto resume;
1299 }
1300 return found;
1301}
1302
1303
1304
1305
1306
1307void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts)
1308{
1309 LIST_HEAD(graveyard);
1310 int found;
1311
1312 spin_lock(&vfsmount_lock);
1313
1314
1315 while ((found = select_submounts(mountpoint, &graveyard)) != 0)
1316 expire_mount_list(&graveyard, mounts);
1317
1318 spin_unlock(&vfsmount_lock);
1319}
1320
1321EXPORT_SYMBOL_GPL(shrink_submounts);
1322
1323
1324
1325
1326
1327
1328
1329static long exact_copy_from_user(void *to, const void __user * from,
1330 unsigned long n)
1331{
1332 char *t = to;
1333 const char __user *f = from;
1334 char c;
1335
1336 if (!access_ok(VERIFY_READ, from, n))
1337 return n;
1338
1339 while (n) {
1340 if (__get_user(c, f)) {
1341 memset(t, 0, n);
1342 break;
1343 }
1344 *t++ = c;
1345 f++;
1346 n--;
1347 }
1348 return n;
1349}
1350
1351int copy_mount_options(const void __user * data, unsigned long *where)
1352{
1353 int i;
1354 unsigned long page;
1355 unsigned long size;
1356
1357 *where = 0;
1358 if (!data)
1359 return 0;
1360
1361 if (!(page = __get_free_page(GFP_KERNEL)))
1362 return -ENOMEM;
1363
1364
1365
1366
1367
1368
1369 size = TASK_SIZE - (unsigned long)data;
1370 if (size > PAGE_SIZE)
1371 size = PAGE_SIZE;
1372
1373 i = size - exact_copy_from_user((void *)page, data, size);
1374 if (!i) {
1375 free_page(page);
1376 return -EFAULT;
1377 }
1378 if (i != PAGE_SIZE)
1379 memset((char *)page + i, 0, PAGE_SIZE - i);
1380 *where = page;
1381 return 0;
1382}
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398long do_mount(char *dev_name, char *dir_name, char *type_page,
1399 unsigned long flags, void *data_page)
1400{
1401 struct nameidata nd;
1402 int retval = 0;
1403 int mnt_flags = 0;
1404
1405
1406 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1407 flags &= ~MS_MGC_MSK;
1408
1409
1410
1411 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1412 return -EINVAL;
1413 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1414 return -EINVAL;
1415
1416 if (data_page)
1417 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1418
1419
1420 if (flags & MS_NOSUID)
1421 mnt_flags |= MNT_NOSUID;
1422 if (flags & MS_NODEV)
1423 mnt_flags |= MNT_NODEV;
1424 if (flags & MS_NOEXEC)
1425 mnt_flags |= MNT_NOEXEC;
1426 if (flags & MS_NOATIME)
1427 mnt_flags |= MNT_NOATIME;
1428 if (flags & MS_NODIRATIME)
1429 mnt_flags |= MNT_NODIRATIME;
1430 if (flags & MS_RELATIME)
1431 mnt_flags |= MNT_RELATIME;
1432
1433 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1434 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
1435
1436
1437 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
1438 if (retval)
1439 return retval;
1440
1441 retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
1442 if (retval)
1443 goto dput_out;
1444
1445 if (flags & MS_REMOUNT)
1446 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1447 data_page);
1448 else if (flags & MS_BIND)
1449 retval = do_loopback(&nd, dev_name, flags & MS_REC);
1450 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1451 retval = do_change_type(&nd, flags);
1452 else if (flags & MS_MOVE)
1453 retval = do_move_mount(&nd, dev_name);
1454 else
1455 retval = do_new_mount(&nd, type_page, flags, mnt_flags,
1456 dev_name, data_page);
1457dput_out:
1458 path_release(&nd);
1459 return retval;
1460}
1461
1462
1463
1464
1465
1466static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1467 struct fs_struct *fs)
1468{
1469 struct mnt_namespace *new_ns;
1470 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
1471 struct vfsmount *p, *q;
1472
1473 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1474 if (!new_ns)
1475 return ERR_PTR(-ENOMEM);
1476
1477 atomic_set(&new_ns->count, 1);
1478 INIT_LIST_HEAD(&new_ns->list);
1479 init_waitqueue_head(&new_ns->poll);
1480 new_ns->event = 0;
1481
1482 down_write(&namespace_sem);
1483
1484 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
1485 CL_COPY_ALL | CL_EXPIRE);
1486 if (!new_ns->root) {
1487 up_write(&namespace_sem);
1488 kfree(new_ns);
1489 return ERR_PTR(-ENOMEM);;
1490 }
1491 spin_lock(&vfsmount_lock);
1492 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
1493 spin_unlock(&vfsmount_lock);
1494
1495
1496
1497
1498
1499
1500 p = mnt_ns->root;
1501 q = new_ns->root;
1502 while (p) {
1503 q->mnt_ns = new_ns;
1504 if (fs) {
1505 if (p == fs->rootmnt) {
1506 rootmnt = p;
1507 fs->rootmnt = mntget(q);
1508 }
1509 if (p == fs->pwdmnt) {
1510 pwdmnt = p;
1511 fs->pwdmnt = mntget(q);
1512 }
1513 if (p == fs->altrootmnt) {
1514 altrootmnt = p;
1515 fs->altrootmnt = mntget(q);
1516 }
1517 }
1518 p = next_mnt(p, mnt_ns->root);
1519 q = next_mnt(q, new_ns->root);
1520 }
1521 up_write(&namespace_sem);
1522
1523 if (rootmnt)
1524 mntput(rootmnt);
1525 if (pwdmnt)
1526 mntput(pwdmnt);
1527 if (altrootmnt)
1528 mntput(altrootmnt);
1529
1530 return new_ns;
1531}
1532
1533struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
1534 struct fs_struct *new_fs)
1535{
1536 struct mnt_namespace *new_ns;
1537
1538 BUG_ON(!ns);
1539 get_mnt_ns(ns);
1540
1541 if (!(flags & CLONE_NEWNS))
1542 return ns;
1543
1544 new_ns = dup_mnt_ns(ns, new_fs);
1545
1546 put_mnt_ns(ns);
1547 return new_ns;
1548}
1549
1550asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
1551 char __user * type, unsigned long flags,
1552 void __user * data)
1553{
1554 int retval;
1555 unsigned long data_page;
1556 unsigned long type_page;
1557 unsigned long dev_page;
1558 char *dir_page;
1559
1560 retval = copy_mount_options(type, &type_page);
1561 if (retval < 0)
1562 return retval;
1563
1564 dir_page = getname(dir_name);
1565 retval = PTR_ERR(dir_page);
1566 if (IS_ERR(dir_page))
1567 goto out1;
1568
1569 retval = copy_mount_options(dev_name, &dev_page);
1570 if (retval < 0)
1571 goto out2;
1572
1573 retval = copy_mount_options(data, &data_page);
1574 if (retval < 0)
1575 goto out3;
1576
1577 lock_kernel();
1578 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
1579 flags, (void *)data_page);
1580 unlock_kernel();
1581 free_page(data_page);
1582
1583out3:
1584 free_page(dev_page);
1585out2:
1586 putname(dir_page);
1587out1:
1588 free_page(type_page);
1589 return retval;
1590}
1591
1592
1593
1594
1595
1596void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
1597 struct dentry *dentry)
1598{
1599 struct dentry *old_root;
1600 struct vfsmount *old_rootmnt;
1601 write_lock(&fs->lock);
1602 old_root = fs->root;
1603 old_rootmnt = fs->rootmnt;
1604 fs->rootmnt = mntget(mnt);
1605 fs->root = dget(dentry);
1606 write_unlock(&fs->lock);
1607 if (old_root) {
1608 dput(old_root);
1609 mntput(old_rootmnt);
1610 }
1611}
1612
1613
1614
1615
1616
1617void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
1618 struct dentry *dentry)
1619{
1620 struct dentry *old_pwd;
1621 struct vfsmount *old_pwdmnt;
1622
1623 write_lock(&fs->lock);
1624 old_pwd = fs->pwd;
1625 old_pwdmnt = fs->pwdmnt;
1626 fs->pwdmnt = mntget(mnt);
1627 fs->pwd = dget(dentry);
1628 write_unlock(&fs->lock);
1629
1630 if (old_pwd) {
1631 dput(old_pwd);
1632 mntput(old_pwdmnt);
1633 }
1634}
1635
1636static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1637{
1638 struct task_struct *g, *p;
1639 struct fs_struct *fs;
1640
1641 read_lock(&tasklist_lock);
1642 do_each_thread(g, p) {
1643 task_lock(p);
1644 fs = p->fs;
1645 if (fs) {
1646 atomic_inc(&fs->count);
1647 task_unlock(p);
1648 if (fs->root == old_nd->dentry
1649 && fs->rootmnt == old_nd->mnt)
1650 set_fs_root(fs, new_nd->mnt, new_nd->dentry);
1651 if (fs->pwd == old_nd->dentry
1652 && fs->pwdmnt == old_nd->mnt)
1653 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry);
1654 put_fs_struct(fs);
1655 } else
1656 task_unlock(p);
1657 } while_each_thread(g, p);
1658 read_unlock(&tasklist_lock);
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686asmlinkage long sys_pivot_root(const char __user * new_root,
1687 const char __user * put_old)
1688{
1689 struct vfsmount *tmp;
1690 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
1691 int error;
1692
1693 if (!capable(CAP_SYS_ADMIN))
1694 return -EPERM;
1695
1696 lock_kernel();
1697
1698 error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
1699 &new_nd);
1700 if (error)
1701 goto out0;
1702 error = -EINVAL;
1703 if (!check_mnt(new_nd.mnt))
1704 goto out1;
1705
1706 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
1707 if (error)
1708 goto out1;
1709
1710 error = security_sb_pivotroot(&old_nd, &new_nd);
1711 if (error) {
1712 path_release(&old_nd);
1713 goto out1;
1714 }
1715
1716 read_lock(¤t->fs->lock);
1717 user_nd.mnt = mntget(current->fs->rootmnt);
1718 user_nd.dentry = dget(current->fs->root);
1719 read_unlock(¤t->fs->lock);
1720 down_write(&namespace_sem);
1721 mutex_lock(&old_nd.dentry->d_inode->i_mutex);
1722 error = -EINVAL;
1723 if (IS_MNT_SHARED(old_nd.mnt) ||
1724 IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
1725 IS_MNT_SHARED(user_nd.mnt->mnt_parent))
1726 goto out2;
1727 if (!check_mnt(user_nd.mnt))
1728 goto out2;
1729 error = -ENOENT;
1730 if (IS_DEADDIR(new_nd.dentry->d_inode))
1731 goto out2;
1732 if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
1733 goto out2;
1734 if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
1735 goto out2;
1736 error = -EBUSY;
1737 if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt)
1738 goto out2;
1739 error = -EINVAL;
1740 if (user_nd.mnt->mnt_root != user_nd.dentry)
1741 goto out2;
1742 if (user_nd.mnt->mnt_parent == user_nd.mnt)
1743 goto out2;
1744 if (new_nd.mnt->mnt_root != new_nd.dentry)
1745 goto out2;
1746 if (new_nd.mnt->mnt_parent == new_nd.mnt)
1747 goto out2;
1748 tmp = old_nd.mnt;
1749 spin_lock(&vfsmount_lock);
1750 if (tmp != new_nd.mnt) {
1751 for (;;) {
1752 if (tmp->mnt_parent == tmp)
1753 goto out3;
1754 if (tmp->mnt_parent == new_nd.mnt)
1755 break;
1756 tmp = tmp->mnt_parent;
1757 }
1758 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry))
1759 goto out3;
1760 } else if (!is_subdir(old_nd.dentry, new_nd.dentry))
1761 goto out3;
1762 detach_mnt(new_nd.mnt, &parent_nd);
1763 detach_mnt(user_nd.mnt, &root_parent);
1764 attach_mnt(user_nd.mnt, &old_nd);
1765 attach_mnt(new_nd.mnt, &root_parent);
1766 touch_mnt_namespace(current->nsproxy->mnt_ns);
1767 spin_unlock(&vfsmount_lock);
1768 chroot_fs_refs(&user_nd, &new_nd);
1769 security_sb_post_pivotroot(&user_nd, &new_nd);
1770 error = 0;
1771 path_release(&root_parent);
1772 path_release(&parent_nd);
1773out2:
1774 mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
1775 up_write(&namespace_sem);
1776 path_release(&user_nd);
1777 path_release(&old_nd);
1778out1:
1779 path_release(&new_nd);
1780out0:
1781 unlock_kernel();
1782 return error;
1783out3:
1784 spin_unlock(&vfsmount_lock);
1785 goto out2;
1786}
1787
1788static void __init init_mount_tree(void)
1789{
1790 struct vfsmount *mnt;
1791 struct mnt_namespace *ns;
1792
1793 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
1794 if (IS_ERR(mnt))
1795 panic("Can't create rootfs");
1796 ns = kmalloc(sizeof(*ns), GFP_KERNEL);
1797 if (!ns)
1798 panic("Can't allocate initial namespace");
1799 atomic_set(&ns->count, 1);
1800 INIT_LIST_HEAD(&ns->list);
1801 init_waitqueue_head(&ns->poll);
1802 ns->event = 0;
1803 list_add(&mnt->mnt_list, &ns->list);
1804 ns->root = mnt;
1805 mnt->mnt_ns = ns;
1806
1807 init_task.nsproxy->mnt_ns = ns;
1808 get_mnt_ns(ns);
1809
1810 set_fs_pwd(current->fs, ns->root, ns->root->mnt_root);
1811 set_fs_root(current->fs, ns->root, ns->root->mnt_root);
1812}
1813
1814void __init mnt_init(void)
1815{
1816 struct list_head *d;
1817 unsigned int nr_hash;
1818 int i;
1819 int err;
1820
1821 init_rwsem(&namespace_sem);
1822
1823 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
1824 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1825
1826 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
1827
1828 if (!mount_hashtable)
1829 panic("Failed to allocate mount hash table\n");
1830
1831
1832
1833
1834
1835
1836 nr_hash = PAGE_SIZE / sizeof(struct list_head);
1837 hash_bits = 0;
1838 do {
1839 hash_bits++;
1840 } while ((nr_hash >> hash_bits) != 0);
1841 hash_bits--;
1842
1843
1844
1845
1846
1847 nr_hash = 1UL << hash_bits;
1848 hash_mask = nr_hash - 1;
1849
1850 printk("Mount-cache hash table entries: %d\n", nr_hash);
1851
1852
1853 d = mount_hashtable;
1854 i = nr_hash;
1855 do {
1856 INIT_LIST_HEAD(d);
1857 d++;
1858 i--;
1859 } while (i);
1860 err = sysfs_init();
1861 if (err)
1862 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
1863 __FUNCTION__, err);
1864 err = subsystem_register(&fs_subsys);
1865 if (err)
1866 printk(KERN_WARNING "%s: subsystem_register error: %d\n",
1867 __FUNCTION__, err);
1868 init_rootfs();
1869 init_mount_tree();
1870}
1871
1872void __put_mnt_ns(struct mnt_namespace *ns)
1873{
1874 struct vfsmount *root = ns->root;
1875 LIST_HEAD(umount_list);
1876 ns->root = NULL;
1877 spin_unlock(&vfsmount_lock);
1878 down_write(&namespace_sem);
1879 spin_lock(&vfsmount_lock);
1880 umount_tree(root, 0, &umount_list);
1881 spin_unlock(&vfsmount_lock);
1882 up_write(&namespace_sem);
1883 release_mounts(&umount_list);
1884 kfree(ns);
1885}
1886