--- arch/x86/kernel/smpcommon_32.c | 4 include/asm-generic/percpu.h | 16 - include/asm-x86/percpu.h | 360 ++++++++++++++++++++++++++++++++++++++++- include/asm/i387_32.h | 2 4 files changed, 366 insertions(+), 16 deletions(-) Index: linux-2.6/arch/x86/kernel/smpcommon_32.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/smpcommon_32.c 2007-11-20 22:43:54.525287944 -0800 +++ linux-2.6/arch/x86/kernel/smpcommon_32.c 2007-11-20 22:46:46.062092422 -0800 @@ -16,10 +16,10 @@ __cpuinit void init_gdt(int cpu) pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, (u32 *)&gdt[GDT_ENTRY_PERCPU].b, - __per_cpu_offset[cpu], 0xFFFFF, + cpu_offset(cpu), 0xFFFFF, 0x80 | DESCTYPE_S | 0x2, 0x8); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; + per_cpu(this_cpu_off, cpu) = cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; } Index: linux-2.6/include/asm-generic/percpu.h =================================================================== --- linux-2.6.orig/include/asm-generic/percpu.h 2007-11-20 22:39:16.650038147 -0800 +++ linux-2.6/include/asm-generic/percpu.h 2007-11-20 22:47:56.696204252 -0800 @@ -16,26 +16,24 @@ #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpuo_var(name) \ + __typeof__(type) per_cpu_var(name) \ ____cacheline_aligned_in_smp /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*({ \ - extern int simple_identifier_##var(void); \ - CPU_PTR(&per_cpu_var(var),cpu); })) -#define __get_cpu_var(var) THIS_CPU(&per_cpu_var(var))) -#define __raw_get_cpu_var(var) __THIS_CPU(&per_cpu_var(var)) +#define per_cpu(var, cpu) (*CPU_PTR(&per_cpu_var(var), (cpu))) +#define __get_cpu_var(var) (*THIS_CPU(&per_cpu_var(var))) +#define __raw_get_cpu_var(var) (*__THIS_CPU(&per_cpu_var(var))) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \ do { \ unsigned int __i; \ for_each_possible_cpu(__i) \ - memcpy(CPU_PTR(pcpudst), (src), (size)); \ + memcpy(CPU_PTR(pcpudst, __i), (src), (size)); \ } while (0) #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__var(name)) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu_var(name)) +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu_var(var)) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu_var(var)) #endif /* _ASM_GENERIC_PERCPU_H_ */ Index: linux-2.6/include/asm-x86/percpu.h =================================================================== --- linux-2.6.orig/include/asm-x86/percpu.h 2007-11-20 22:19:44.054038009 -0800 +++ linux-2.6/include/asm-x86/percpu.h 2007-11-20 22:43:07.037037373 -0800 @@ -1,5 +1,357 @@ +#ifndef __ARCH_X86_PERCPU__ +#define __ARCH_X86_PERCPU__ + #ifdef CONFIG_X86_32 -# include "percpu_32.h" -#else -# include "percpu_64.h" -#endif +/* + * Special definitions for X86_32 + */ + +#ifdef __ASSEMBLY__ + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * reg - 32bit register + * + * The resulting address is stored in the "reg" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#define PER_CPU(var, reg) \ + movl %fs:per_cpu__##this_cpu_off, reg; \ + lea per_cpu__##var(reg), reg +#define PER_CPU_VAR(var) %fs:per_cpu__##var +#else /* ...!ASSEMBLY */ + +extern char cpu_area[]; + +#include +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + +#define __THIS_CPU_OFFSET CPU_READ(this_cpu_off) + +#define x86_read_percpu(var) CPU_READ(per_cpu_var(var)) +#define x86_write_percpu(var,val) CPU_WRITE(per_cpu_var(var), (val)) +#define x86_add_percpu(var,val) CPU_ADD(per_cpu_var(var), (val)) +#define x86_sub_percpu(var,val) CPU_SUB(per_cpu_var(var), (val)) +#define x86_or_percpu(var,val) CPU_OR(per_cpu_var(var), (val)) +#endif /* !__ASSEMBLY__ */ + +#define SEGREG "%%fs:" + +#else /* CONFIG_X86_32 */ +/* + * Special definitions for X86_64 + */ +#include +#include +#define cpu_area ((void *)CPU_AREA_BASE) +#include +DECLARE_PER_CPU(struct x8664_pda, pda); + +#define SEGREG "%%gs:" + +#endif /* !CONFIG_X86_32 */ + +#ifndef __ASSEMBLY__ + +#define __xp(x) ((volatile unsigned long *)(x)) + +static inline unsigned long __cpu_read_gs(volatile void *ptr, int size) +{ + unsigned long result; + switch (size) { + case 1: + __asm__ ("mov " SEGREG "%1, %b0" + : "=r"(result) + : "m"(*__xp(ptr))); + return result; + case 2: + __asm__ ("movw " SEGREG "%1, %w0" + : "=r"(result) + : "m"(*__xp(ptr))); + return result; + case 4: + __asm__ ("movl " SEGREG "%1, %k0" + : "=r"(result) + : "m"(*__xp(ptr))); + return result; + case 8: + __asm__ ("movq " SEGREG "%1, %0" + : "=r"(result) + : "m"(*__xp(ptr))); + return result; + } + BUG(); +} + +#define cpu_read_gs(obj)\ + ((__typeof__(obj))__cpu_read_gs(&(obj), sizeof(obj))) + +static inline void __cpu_write_gs(volatile void *ptr, + unsigned long data, int size) +{ + switch (size) { + case 1: + __asm__ ("movb %b0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("movw %w0," SEGREG "1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("movl %k0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("movq %0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_write_gs(obj, value)\ + __cpu_write_gs(&(obj), (unsigned long)value, sizeof(obj)) + +static inline void __cpu_add_gs(volatile void *ptr, + long data, int size) +{ + switch (size) { + case 1: + __asm__ ("add %b0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("addw %w0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("addl %k0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("addq %0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_add_gs(obj, value)\ + __cpu_add_gs(&(obj), (unsigned long)value, sizeof(obj)) + +static inline void __cpu_sub_gs(volatile void *ptr, + long data, int size) +{ + switch (size) { + case 1: + __asm__ ("subb %b0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("subw %w0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("subl %k0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("subq %0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_sub_gs(obj, value)\ + __cpu_sub_gs(&(obj), (unsigned long)value, sizeof(obj)) + +static inline void __cpu_xchg_gs(volatile void *ptr, + long data, int size) +{ + switch (size) { + case 1: + __asm__ ("xchg %b0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("xchgw %w0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("xchgl %k0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("xchgq %0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_xchg_gs(obj, value)\ + __cpu_xchg_gs(&(obj), (unsigned long)value, sizeof(obj)) + +static inline void __cpu_inc_gs(volatile void *ptr, int size) +{ + switch (size) { + case 1: + __asm__ ("incb " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("incw " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("incl " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("incq " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_inc_gs(obj)\ + __cpu_inc_gs(&(obj), sizeof(obj)) + +static inline void __cpu_dec_gs(volatile void *ptr, int size) +{ + switch (size) { + case 1: + __asm__ ("decb " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("decw " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("decl " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("decq " SEGREG "%0" + : : "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_dec_gs(obj)\ + __cpu_dec_gs(&(obj), sizeof(obj)) + +static inline unsigned long __cmpxchg_local_gs(volatile void *ptr, + unsigned long old, unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ ("cmpxchgb %b1," SEGREG "%2" + : "=a"(prev) + : "q"(new), "m"(*__xp(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ ("cmpxchgw %w1," SEGREG "%2" + : "=a"(prev) + : "r"(new), "m"(*__xp(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ ("cmpxchgl %k1," SEGREG "%2" + : "=a"(prev) + : "r"(new), "m"(*__xp(ptr)), "0"(old) + : "memory"); + return prev; + case 8: + __asm__ ("cmpxchgq %1," SEGREG "%2" + : "=a"(prev) + : "r"(new), "m"(*__xp(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg_local_gs(obj, o, n)\ + ((__typeof__(obj))__cmpxchg_local_gs(&(obj),(unsigned long)(o),\ + (unsigned long)(n),sizeof(obj))) + +static inline void __cpu_or_gs(volatile void *ptr, + long data, int size) +{ + switch (size) { + case 1: + __asm__ ("orb %b0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 2: + __asm__ ("orw %w0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 4: + __asm__ ("orl %k0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + case 8: + __asm__ ("orq %0," SEGREG "%1" + : : "ri"(data), "m"(*__xp(ptr))); + return; + } + BUG(); +} + +#define cpu_or_gs(obj, value)\ + __cpu_or_gs(&(obj), (unsigned long)value, sizeof(obj)) + +#define CPU_READ(obj) cpu_read_gs(obj) +#define CPU_WRITE(obj,val) cpu_write_gs(obj, val) +#define CPU_ADD(obj,val) cpu_add_gs(obj, val) +#define CPU_SUB(obj,val) cpu_sub_gs(obj, val) +#define CPU_INC(obj) cpu_inc_gs(obj) +#define CPU_DEC(obj) cpu_dec_gs(obj) +#define CPU_OR(obj, val) cpu_or_gs(obj, val) + +#define CPU_XCHG(obj,val) cpu_xchg_gs(obj, val) +#define CPU_CMPXCHG(obj, old, new) cmpxchg_local_gs(obj, old, new) + +/* + * All cpu operations are interrupt safe and do not need to disable + * preempt. So the other variants all reduce to the same instruction. + */ +#define _CPU_READ CPU_READ +#define _CPU_WRITE CPU_WRITE +#define _CPU_ADD CPU_ADD +#define _CPU_SUB CPU_SUB +#define _CPU_INC CPU_INC +#define _CPU_DEC CPU_DEC +#define _CPU_XCHG CPU_XCHG +#define _CPU_CMPXCHG CPU_CMPXCHG +#define _CPU_OR CPU_OR + +#define __CPU_READ CPU_READ +#define __CPU_WRITE CPU_WRITE +#define __CPU_ADD CPU_ADD +#define __CPU_SUB CPU_SUB +#define __CPU_INC CPU_INC +#define __CPU_DEC CPU_DEC +#define __CPU_XCHG CPU_XCHG +#define __CPU_CMPXCHG CPU_CMPXCHG +#define __CPU_OR CPU_OR + +#endif /* __ASSEMBLY */ + +#endif /* __ARCH_X86_PERCPU__ */ + Index: linux-2.6/include/asm/i387_32.h =================================================================== --- linux-2.6.orig/include/asm/i387_32.h 2007-11-20 22:30:24.161287850 -0800 +++ linux-2.6/include/asm/i387_32.h 2007-11-20 22:33:16.649037533 -0800 @@ -43,7 +43,7 @@ extern void kernel_fpu_begin(void); in L1 during context switch. The best choices are unfortunately different for UP and SMP */ #ifdef CONFIG_SMP -#define safe_address (__per_cpu_offset[0]) +#define safe_address (*CPU_PTR((u8 *)NULL, 0)) #else #define safe_address (kstat_cpu(0).cpustat.user) #endif