From: Oleg Nesterov I am not sure about this patch, I am asking Ingo to take a decision. task_struct->state == EXIT_DEAD is a very special case, to avoid a confusion it makes sense to introduce a new state, TASK_DEAD, while EXIT_DEAD should live only in ->exit_state as documented in sched.h. Note that this state is not visible to user-space, get_task_state() masks off unsuitable states. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton --- include/linux/sched.h | 1 + kernel/exit.c | 2 +- kernel/sched.c | 8 ++++---- mm/oom_kill.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff -puN include/linux/sched.h~introduce-task_dead-state include/linux/sched.h --- a/include/linux/sched.h~introduce-task_dead-state +++ a/include/linux/sched.h @@ -148,6 +148,7 @@ extern unsigned long weighted_cpuload(co #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#define TASK_DEAD 128 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) diff -puN kernel/exit.c~introduce-task_dead-state kernel/exit.c --- a/kernel/exit.c~introduce-task_dead-state +++ a/kernel/exit.c @@ -956,7 +956,7 @@ fastcall NORET_TYPE void do_exit(long co preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ - tsk->state = EXIT_DEAD; + tsk->state = TASK_DEAD; schedule(); BUG(); diff -puN kernel/sched.c~introduce-task_dead-state kernel/sched.c --- a/kernel/sched.c~introduce-task_dead-state +++ a/kernel/sched.c @@ -1761,10 +1761,10 @@ static inline void finish_task_switch(st /* * A task struct has one reference for the use as "current". - * If a task dies, then it sets EXIT_DEAD in tsk->state and calls + * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for EXIT_DEAD must occur while the runqueue locks are + * The test for TASK_DEAD must occur while the runqueue locks are * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. @@ -1775,7 +1775,7 @@ static inline void finish_task_switch(st finish_lock_switch(rq, prev); if (mm) mmdrop(mm); - if (unlikely(prev_state == EXIT_DEAD)) { + if (unlikely(prev_state == TASK_DEAD)) { /* * Remove function-return probe instances associated with this * task and put them back on the free list. @@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int de BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->state == EXIT_DEAD); + BUG_ON(p->state == TASK_DEAD); get_task_struct(p); diff -puN mm/oom_kill.c~introduce-task_dead-state mm/oom_kill.c --- a/mm/oom_kill.c~introduce-task_dead-state +++ a/mm/oom_kill.c @@ -227,7 +227,7 @@ static struct task_struct *select_bad_pr p->flags & PF_EXITING; if (releasing) { /* TASK_DEAD tasks have already released their mm */ - if (p->state == EXIT_DEAD) + if (p->state == TASK_DEAD) continue; if (p->flags & PF_EXITING && p == current) { chosen = p; _