From: "Serge E. Hallyn" This moves the mount namespace into the nsproxy. The mount namespace count now refers to the number of nsproxies point to it, rather than the number of tasks. As a result, the unshare_namespace() function in kernel/fork.c no longer checks whether it is being shared. Signed-off-by: Serge Hallyn Cc: Kirill Korotaev Cc: "Eric W. Biederman" Cc: Herbert Poetzl Cc: Andrey Savochkin Signed-off-by: Andrew Morton --- fs/namespace.c | 22 ++++++++-------------- fs/proc/base.c | 5 +++-- include/linux/init_task.h | 1 + include/linux/namespace.h | 6 ++---- include/linux/nsproxy.h | 3 +++ include/linux/sched.h | 4 +--- kernel/exit.c | 4 ---- kernel/fork.c | 17 ++++++----------- kernel/nsproxy.c | 32 +++++++++++++++++++++++++++++++- 9 files changed, 55 insertions(+), 39 deletions(-) diff -puN fs/namespace.c~namespaces-incorporate-fs-namespace-into-nsproxy fs/namespace.c --- a/fs/namespace.c~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/fs/namespace.c @@ -142,7 +142,7 @@ struct vfsmount *lookup_mnt(struct vfsmo static inline int check_mnt(struct vfsmount *mnt) { - return mnt->mnt_namespace == current->namespace; + return mnt->mnt_namespace == current->nsproxy->namespace; } static void touch_namespace(struct namespace *ns) @@ -839,7 +839,7 @@ static int attach_recursive_mnt(struct v if (parent_nd) { detach_mnt(source_mnt, parent_nd); attach_mnt(source_mnt, nd); - touch_namespace(current->namespace); + touch_namespace(current->nsproxy->namespace); } else { mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); commit_tree(source_mnt); @@ -1450,7 +1450,7 @@ dput_out: */ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs) { - struct namespace *namespace = tsk->namespace; + struct namespace *namespace = tsk->nsproxy->namespace; struct namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; struct vfsmount *p, *q; @@ -1517,7 +1517,7 @@ struct namespace *dup_namespace(struct t int copy_namespace(int flags, struct task_struct *tsk) { - struct namespace *namespace = tsk->namespace; + struct namespace *namespace = tsk->nsproxy->namespace; struct namespace *new_ns; int err = 0; @@ -1540,7 +1540,7 @@ int copy_namespace(int flags, struct tas goto out; } - tsk->namespace = new_ns; + tsk->nsproxy->namespace = new_ns; out: put_namespace(namespace); @@ -1763,7 +1763,7 @@ asmlinkage long sys_pivot_root(const cha detach_mnt(user_nd.mnt, &root_parent); attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ - touch_namespace(current->namespace); + touch_namespace(current->nsproxy->namespace); spin_unlock(&vfsmount_lock); chroot_fs_refs(&user_nd, &new_nd); security_sb_post_pivotroot(&user_nd, &new_nd); @@ -1789,7 +1789,6 @@ static void __init init_mount_tree(void) { struct vfsmount *mnt; struct namespace *namespace; - struct task_struct *g, *p; mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) @@ -1805,13 +1804,8 @@ static void __init init_mount_tree(void) namespace->root = mnt; mnt->mnt_namespace = namespace; - init_task.namespace = namespace; - read_lock(&tasklist_lock); - do_each_thread(g, p) { - get_namespace(namespace); - p->namespace = namespace; - } while_each_thread(g, p); - read_unlock(&tasklist_lock); + init_task.nsproxy->namespace = namespace; + get_namespace(namespace); set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); diff -puN fs/proc/base.c~namespaces-incorporate-fs-namespace-into-nsproxy fs/proc/base.c --- a/fs/proc/base.c~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/fs/proc/base.c @@ -72,6 +72,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: @@ -567,7 +568,7 @@ static int mounts_open(struct inode *ino if (task) { task_lock(task); - namespace = task->namespace; + namespace = task->nsproxy->namespace; if (namespace) get_namespace(namespace); task_unlock(task); @@ -638,7 +639,7 @@ static int mountstats_open(struct inode if (task) { task_lock(task); - namespace = task->namespace; + namespace = task->nsproxy->namespace; if (namespace) get_namespace(namespace); task_unlock(task); diff -puN include/linux/init_task.h~namespaces-incorporate-fs-namespace-into-nsproxy include/linux/init_task.h --- a/include/linux/init_task.h~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/include/linux/init_task.h @@ -70,6 +70,7 @@ extern struct nsproxy init_nsproxy; #define INIT_NSPROXY(nsproxy) { \ .count = ATOMIC_INIT(1), \ .nslock = SPIN_LOCK_UNLOCKED, \ + .namespace = NULL, \ } #define INIT_SIGHAND(sighand) { \ diff -puN include/linux/namespace.h~namespaces-incorporate-fs-namespace-into-nsproxy include/linux/namespace.h --- a/include/linux/namespace.h~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/include/linux/namespace.h @@ -4,6 +4,7 @@ #include #include +#include struct namespace { atomic_t count; @@ -26,11 +27,8 @@ static inline void put_namespace(struct static inline void exit_namespace(struct task_struct *p) { - struct namespace *namespace = p->namespace; + struct namespace *namespace = p->nsproxy->namespace; if (namespace) { - task_lock(p); - p->namespace = NULL; - task_unlock(p); put_namespace(namespace); } } diff -puN include/linux/nsproxy.h~namespaces-incorporate-fs-namespace-into-nsproxy include/linux/nsproxy.h --- a/include/linux/nsproxy.h~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/include/linux/nsproxy.h @@ -4,6 +4,8 @@ #include #include +struct namespace; + /* * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. @@ -19,6 +21,7 @@ struct nsproxy { atomic_t count; spinlock_t nslock; + struct namespace *namespace; }; extern struct nsproxy init_nsproxy; diff -puN include/linux/sched.h~namespaces-incorporate-fs-namespace-into-nsproxy include/linux/sched.h --- a/include/linux/sched.h~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/include/linux/sched.h @@ -238,7 +238,6 @@ extern signed long schedule_timeout_inte extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); -struct namespace; struct nsproxy; /* Maximum number of active map areas.. This is a random (large) number */ @@ -896,8 +895,7 @@ struct task_struct { struct fs_struct *fs; /* open file information */ struct files_struct *files; -/* namespace */ - struct namespace *namespace; +/* namespaces */ struct nsproxy *nsproxy; /* signal handlers */ struct signal_struct *signal; diff -puN kernel/exit.c~namespaces-incorporate-fs-namespace-into-nsproxy kernel/exit.c --- a/kernel/exit.c~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/kernel/exit.c @@ -411,11 +411,8 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(&fs->count); - exit_namespace(current); exit_task_namespaces(current); - current->namespace = init_task.namespace; current->nsproxy = init_task.nsproxy; - get_namespace(current->namespace); get_task_namespaces(current); exit_files(current); @@ -921,7 +918,6 @@ fastcall NORET_TYPE void do_exit(long co exit_sem(tsk); __exit_files(tsk); __exit_fs(tsk); - exit_namespace(tsk); exit_task_namespaces(tsk); exit_thread(); cpuset_exit(tsk); diff -puN kernel/fork.c~namespaces-incorporate-fs-namespace-into-nsproxy kernel/fork.c --- a/kernel/fork.c~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/kernel/fork.c @@ -1086,11 +1086,9 @@ static task_t *copy_process(unsigned lon goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_keys; - if ((retval = copy_namespace(clone_flags, p))) - goto bad_fork_cleanup_namespaces; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) - goto bad_fork_cleanup_namespace; + goto bad_fork_cleanup_namespaces; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* @@ -1180,7 +1178,7 @@ static task_t *copy_process(unsigned lon spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_cleanup_namespace; + goto bad_fork_cleanup_namespaces; } if (clone_flags & CLONE_THREAD) { @@ -1233,8 +1231,6 @@ static task_t *copy_process(unsigned lon proc_fork_connector(p); return p; -bad_fork_cleanup_namespace: - exit_namespace(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_keys: @@ -1486,10 +1482,9 @@ static int unshare_fs(unsigned long unsh */ static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) { - struct namespace *ns = current->namespace; + struct namespace *ns = current->nsproxy->namespace; - if ((unshare_flags & CLONE_NEWNS) && - (ns && atomic_read(&ns->count) > 1)) { + if ((unshare_flags & CLONE_NEWNS) && ns) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1622,8 +1617,8 @@ asmlinkage long sys_unshare(unsigned lon } if (new_ns) { - ns = current->namespace; - current->namespace = new_ns; + ns = current->nsproxy->namespace; + current->nsproxy->namespace = new_ns; new_ns = ns; } diff -puN kernel/nsproxy.c~namespaces-incorporate-fs-namespace-into-nsproxy kernel/nsproxy.c --- a/kernel/nsproxy.c~namespaces-incorporate-fs-namespace-into-nsproxy +++ a/kernel/nsproxy.c @@ -12,6 +12,7 @@ #include #include #include +#include static inline void get_nsproxy(struct nsproxy *ns) { @@ -52,6 +53,11 @@ struct nsproxy *dup_namespaces(struct ns { struct nsproxy *ns = clone_namespaces(orig); + if (ns) { + if (ns->namespace) + get_namespace(ns->namespace); + } + return ns; } @@ -62,16 +68,40 @@ struct nsproxy *dup_namespaces(struct ns int copy_namespaces(int flags, struct task_struct *tsk) { struct nsproxy *old_ns = tsk->nsproxy; + struct nsproxy *new_ns; + int err = 0; if (!old_ns) return 0; get_nsproxy(old_ns); - return 0; + if (!(flags & CLONE_NEWNS)) + return 0; + + new_ns = clone_namespaces(old_ns); + if (!new_ns) { + err = -ENOMEM; + goto out; + } + + tsk->nsproxy = new_ns; + + err = copy_namespace(flags, tsk); + if (err) { + tsk->nsproxy = old_ns; + put_nsproxy(new_ns); + goto out; + } + +out: + put_nsproxy(old_ns); + return err; } void free_nsproxy(struct nsproxy *ns) { + if (ns->namespace) + put_namespace(ns->namespace); kfree(ns); } _