From: Andrew Morton Revert commit 34e43535bb289dd5a4222a4e33c3b4e75f580744 Author: Suresh Siddha Date: Mon Mar 10 15:28:05 2008 -0700 x86, fpu: lazy allocation of FPU area - v5 Only allocate the FPU area when the application actually uses FPU, i.e., in the first lazy FPU trap. This could save memory for non-fpu using apps. for example: on my system after boot, there are around 300 processes, with only 17 using FPU. Signed-off-by: Suresh Siddha Cc: Arjan van de Ven Signed-off-by: Ingo Molnar To make way for the reversion of "x86, fpu: split FPU state from task struct - v5". Cc: Suresh Siddha Cc: Arjan van de Ven Cc: Ingo Molnar Cc: Joe Perches Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/kernel/i387.c | 38 ++++++--------------------------- arch/x86/kernel/process.c | 28 +++++++----------------- arch/x86/kernel/process_32.c | 4 --- arch/x86/kernel/process_64.c | 4 --- arch/x86/kernel/traps_32.c | 17 +------------- arch/x86/kernel/traps_64.c | 19 ++-------------- include/asm-x86/i387.h | 2 - include/asm-x86/processor.h | 2 - 8 files changed, 23 insertions(+), 91 deletions(-) diff -puN arch/x86/kernel/i387.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 arch/x86/kernel/i387.c --- a/arch/x86/kernel/i387.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/arch/x86/kernel/i387.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ void __init init_thread_xstate(void) else xstate_size = sizeof(struct i387_fsave_struct); #endif + init_task.thread.xstate = alloc_bootmem(xstate_size); } #ifdef CONFIG_X86_64 @@ -91,22 +93,12 @@ void __cpuinit fpu_init(void) * value at reset if we support XMM instructions and then * remeber the current task has used the FPU. */ -int init_fpu(struct task_struct *tsk) +void init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { if (tsk == current) unlazy_fpu(tsk); - return 0; - } - - /* - * Memory allocation at the first usage of the FPU and other state. - */ - if (!tsk->thread.xstate) { - tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, - GFP_KERNEL); - if (!tsk->thread.xstate) - return -ENOMEM; + return; } if (cpu_has_fxsr) { @@ -128,7 +120,6 @@ int init_fpu(struct task_struct *tsk) * Only the device not available exception or ptrace can call init_fpu. */ set_stopped_child_used_math(tsk); - return 0; } int fpregs_active(struct task_struct *target, const struct user_regset *regset) @@ -145,14 +136,10 @@ int xfpregs_get(struct task_struct *targ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - int ret; - if (!cpu_has_fxsr) return -ENODEV; - ret = init_fpu(target); - if (ret) - return ret; + init_fpu(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fxsave, 0, -1); @@ -167,10 +154,7 @@ int xfpregs_set(struct task_struct *targ if (!cpu_has_fxsr) return -ENODEV; - ret = init_fpu(target); - if (ret) - return ret; - + init_fpu(target); set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -328,14 +312,11 @@ int fpregs_get(struct task_struct *targe void *kbuf, void __user *ubuf) { struct user_i387_ia32_struct env; - int ret; if (!HAVE_HWFP) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); - if (ret) - return ret; + init_fpu(target); if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -363,10 +344,7 @@ int fpregs_set(struct task_struct *targe if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); - if (ret) - return ret; - + init_fpu(target); set_stopped_child_used_math(target); if (!cpu_has_fxsr) { diff -puN arch/x86/kernel/process.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 arch/x86/kernel/process.c --- a/arch/x86/kernel/process.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/arch/x86/kernel/process.c @@ -5,34 +5,24 @@ #include #include -struct kmem_cache *task_xstate_cachep; +static struct kmem_cache *task_xstate_cachep; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - if (src->thread.xstate) { - dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, - GFP_KERNEL); - if (!dst->thread.xstate) - return -ENOMEM; - WARN_ON((unsigned long)dst->thread.xstate & 15); - memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); - } + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); return 0; } -void free_thread_xstate(struct task_struct *tsk) -{ - if (tsk->thread.xstate) { - kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); - tsk->thread.xstate = NULL; - } -} - - void free_thread_info(struct thread_info *ti) { - free_thread_xstate(ti->task); + kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); + ti->task->thread.xstate = NULL; + free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); } diff -puN arch/x86/kernel/process_32.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 arch/x86/kernel/process_32.c --- a/arch/x86/kernel/process_32.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/arch/x86/kernel/process_32.c @@ -521,10 +521,6 @@ start_thread(struct pt_regs *regs, unsig regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff -puN arch/x86/kernel/process_64.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 arch/x86/kernel/process_64.c --- a/arch/x86/kernel/process_64.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/arch/x86/kernel/process_64.c @@ -544,10 +544,6 @@ start_thread(struct pt_regs *regs, unsig regs->ss = __USER_DS; regs->flags = 0x200; set_fs(USER_DS); - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff -puN arch/x86/kernel/traps_32.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 arch/x86/kernel/traps_32.c --- a/arch/x86/kernel/traps_32.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/arch/x86/kernel/traps_32.c @@ -1146,22 +1146,9 @@ asmlinkage void math_state_restore(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; - if (!tsk_used_math(tsk)) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (init_fpu(tsk)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - clts(); /* Allow maths ops (or we recurse) */ + if (!tsk_used_math(tsk)) + init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; diff -puN arch/x86/kernel/traps_64.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 arch/x86/kernel/traps_64.c --- a/arch/x86/kernel/traps_64.c~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/arch/x86/kernel/traps_64.c @@ -1116,23 +1116,10 @@ asmlinkage void __attribute__((weak)) mc asmlinkage void math_state_restore(void) { struct task_struct *me = current; - - if (!used_math()) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (init_fpu(me)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - clts(); /* Allow maths ops (or we recurse) */ + + if (!used_math()) + init_fpu(me); restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; diff -puN include/asm-x86/i387.h~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 include/asm-x86/i387.h --- a/include/asm-x86/i387.h~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/include/asm-x86/i387.h @@ -21,7 +21,7 @@ extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); -extern int init_fpu(struct task_struct *child); +extern void init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); extern void init_thread_xstate(void); diff -puN include/asm-x86/processor.h~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 include/asm-x86/processor.h --- a/include/asm-x86/processor.h~git-x86-revert-x86-fpu-lazy-allocation-of-fpu-area-v5 +++ a/include/asm-x86/processor.h @@ -366,8 +366,6 @@ DECLARE_PER_CPU(struct orig_ist, orig_is extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; -extern void free_thread_xstate(struct task_struct *); -extern struct kmem_cache *task_xstate_cachep; extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; _