From: Balbir Singh This patch adds an additional field to the mm_owner callbacks. This field is required to get to the mm that changed. Hold mmap_sem in write mode before calling the mm_owner_changed callback [hugh@veritas.com: fix mmap_sem deadlock] Signed-off-by: Balbir Singh Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Li Zefan Cc: Pavel Emelianov Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Vivek Goyal Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton --- fs/exec.c | 2 +- include/linux/cgroup.h | 3 ++- kernel/cgroup.c | 4 +++- kernel/exit.c | 11 ++++------- 4 files changed, 10 insertions(+), 10 deletions(-) diff -puN fs/exec.c~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info fs/exec.c --- a/fs/exec.c~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info +++ a/fs/exec.c @@ -753,11 +753,11 @@ static int exec_mmap(struct mm_struct *m tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); - mm_update_next_owner(old_mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); + mm_update_next_owner(old_mm); mmput(old_mm); return 0; } diff -puN include/linux/cgroup.h~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info include/linux/cgroup.h --- a/include/linux/cgroup.h~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info +++ a/include/linux/cgroup.h @@ -326,7 +326,8 @@ struct cgroup_subsys { */ void (*mm_owner_changed)(struct cgroup_subsys *ss, struct cgroup *old, - struct cgroup *new); + struct cgroup *new, + struct task_struct *p); int subsys_id; int active; int disabled; diff -puN kernel/cgroup.c~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info kernel/cgroup.c --- a/kernel/cgroup.c~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info +++ a/kernel/cgroup.c @@ -2738,6 +2738,8 @@ void cgroup_fork_callbacks(struct task_s * Called on every change to mm->owner. mm_init_owner() does not * invoke this routine, since it assigns the mm->owner the first time * and does not change it. + * + * The callbacks are invoked with mmap_sem held in read mode. */ void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) { @@ -2752,7 +2754,7 @@ void cgroup_mm_owner_callbacks(struct ta if (oldcgrp == newcgrp) continue; if (ss->mm_owner_changed) - ss->mm_owner_changed(ss, oldcgrp, newcgrp); + ss->mm_owner_changed(ss, oldcgrp, newcgrp, new); } } } diff -puN kernel/exit.c~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info kernel/exit.c --- a/kernel/exit.c~memrlimit-cgroup-mm-owner-callback-changes-to-add-task-info +++ a/kernel/exit.c @@ -592,8 +592,6 @@ mm_need_new_owner(struct mm_struct *mm, * If there are other users of the mm and the owner (us) is exiting * we need to find a new owner to take on the responsibility. */ - if (!mm) - return 0; if (atomic_read(&mm->mm_users) <= 1) return 0; if (mm->owner != p) @@ -641,24 +639,23 @@ retry: assign_new_owner: BUG_ON(c == p); get_task_struct(c); + read_unlock(&tasklist_lock); + down_write(&mm->mmap_sem); /* * The task_lock protects c->mm from changing. * We always want mm->owner->mm == mm */ task_lock(c); - /* - * Delay read_unlock() till we have the task_lock() - * to ensure that c does not slip away underneath us - */ - read_unlock(&tasklist_lock); if (c->mm != mm) { task_unlock(c); + up_write(&mm->mmap_sem); put_task_struct(c); goto retry; } cgroup_mm_owner_callbacks(mm->owner, c); mm->owner = c; task_unlock(c); + up_write(&mm->mmap_sem); put_task_struct(c); } #endif /* CONFIG_MM_OWNER */ _