Subject: x86_64: Fold pda into per cpu area * Declare the pda as a per cpu variable. This will move the pda area to an address accessible by the x86_64 per cpu macros. Subtraction of __per_cpu_start will make the offset based from the beginning of the per cpu area. Since %gs is pointing to the pda, it will then also point to the per cpu variables and can be accessed thusly: %gs:[&per_cpu_xxxx - __per_cpu_start] * Remove the code that allocates special pda data structures. Since the percpu area is currently maintained for all possible cpus then the pda regions will stay intact in case cpus are hotplugged off and then back on. Signed-off-by: Christoph Lameter Signed-off-by: Mike Travis --- arch/x86/Kconfig | 3 +++ arch/x86/kernel/head64.c | 15 +++++++++++++-- arch/x86/kernel/setup.c | 19 ++++++++++++++++++- arch/x86/kernel/smpboot.c | 16 ---------------- arch/x86/kernel/vmlinux_64.lds.S | 1 + include/asm-x86/percpu.h | 30 +++++++++++++++--------------- kernel/module.c | 7 ++++--- 7 files changed, 54 insertions(+), 37 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig 2008-05-28 00:04:13.000000000 -0700 +++ linux-2.6/arch/x86/Kconfig 2008-05-28 00:08:21.000000000 -0700 @@ -126,6 +126,9 @@ config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP +config HAVE_ZERO_BASED_PER_CPU + def_bool X86_64 && SMP + config ARCH_HIBERNATION_POSSIBLE def_bool y depends on !SMP || !X86_VOYAGER Index: linux-2.6/arch/x86/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup.c 2008-05-28 00:04:13.000000000 -0700 +++ linux-2.6/arch/x86/kernel/setup.c 2008-05-28 00:09:39.000000000 -0700 @@ -26,6 +26,11 @@ physid_mask_t phys_cpu_present_map; #endif +#ifdef CONFIG_X86_64 +DEFINE_PER_CPU_FIRST(struct x8664_pda, pda); +EXPORT_PER_CPU_SYMBOL(pda); +#endif + #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) /* * Copy data used in early init routines from the initial arrays to the @@ -116,17 +121,29 @@ #endif if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); + + memcpy(ptr, __per_cpu_load, __per_cpu_size); + #ifdef CONFIG_X86_64 + /* + * So far we have used an embryonic per cpu area that only contained + * the pda. Move the pda contents into the full per cpu area. + */ cpu_pda(i)->data_offset = ptr - __per_cpu_start; + memcpy(ptr, cpu_pda(i), sizeof(struct x8664_pda)); + cpu_pda(i) = (struct x8664_pda *)ptr; #else __per_cpu_offset[i] = ptr - __per_cpu_start; #endif - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); highest_cpu = i; } nr_cpu_ids = highest_cpu + 1; +#ifdef CONFIG_X86_64 + /* Fix up pda for boot processor */ + pda_init(0); +#endif printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); /* Setup percpu data maps */ Index: linux-2.6/arch/x86/kernel/vmlinux_64.lds.S =================================================================== --- linux-2.6.orig/arch/x86/kernel/vmlinux_64.lds.S 2008-05-28 00:04:13.000000000 -0700 +++ linux-2.6/arch/x86/kernel/vmlinux_64.lds.S 2008-05-28 00:05:56.000000000 -0700 @@ -16,6 +16,7 @@ _proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ + percpu PT_LOAD FLAGS(4); /* R__ */ data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ Index: linux-2.6/include/asm-x86/percpu.h =================================================================== --- linux-2.6.orig/include/asm-x86/percpu.h 2008-05-28 00:04:13.000000000 -0700 +++ linux-2.6/include/asm-x86/percpu.h 2008-05-28 00:09:40.000000000 -0700 @@ -3,21 +3,16 @@ #ifdef CONFIG_X86_64 #include - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP #include +#ifdef CONFIG_SMP #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset read_pda(data_offset) - #define per_cpu_offset(x) (__per_cpu_offset(x)) - #endif + +#define __percpu_seg "%%gs:" + #include DECLARE_PER_CPU(struct x8664_pda, pda); @@ -81,6 +76,11 @@ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); +#endif /* __ASSEMBLY__ */ +#endif /* !CONFIG_X86_64 */ + +#ifndef __ASSEMBLY__ + /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); @@ -136,11 +136,11 @@ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu_var(var)) +#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu_var(var), val) +#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu_var(var), val) +#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu_var(var), val) +#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu_var(var), val) + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #endif /* _ASM_X86_PERCPU_H_ */ Index: linux-2.6/kernel/module.c =================================================================== --- linux-2.6.orig/kernel/module.c 2008-05-28 00:04:13.000000000 -0700 +++ linux-2.6/kernel/module.c 2008-05-28 00:08:25.000000000 -0700 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -365,7 +366,7 @@ align = PAGE_SIZE; } - ptr = __per_cpu_start; + ptr = __per_cpu_load; for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { /* Extra for alignment requirement. */ extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; @@ -400,7 +401,7 @@ static void percpu_modfree(void *freeme) { unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); + void *ptr = __per_cpu_load + block_size(pcpu_size[0]); /* First entry is core kernel percpu data. */ for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { @@ -451,7 +452,7 @@ pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, GFP_KERNEL); /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); + pcpu_size[0] = -__per_cpu_size; /* Free room. */ pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; if (pcpu_size[1] < 0) { Index: linux-2.6/arch/x86/kernel/smpboot.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/smpboot.c 2008-05-28 00:04:13.000000000 -0700 +++ linux-2.6/arch/x86/kernel/smpboot.c 2008-05-28 00:09:39.000000000 -0700 @@ -855,22 +855,6 @@ printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); return -1; } - - /* Allocate node local memory for AP pdas */ - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { - struct x8664_pda *newpda, *pda; - int node = cpu_to_node(cpu); - pda = cpu_pda(cpu); - newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, - node); - if (newpda) { - memcpy(newpda, pda, sizeof(struct x8664_pda)); - cpu_pda(cpu) = newpda; - } else - printk(KERN_ERR - "Could not allocate node local PDA for CPU %d on node %d\n", - cpu, node); - } #endif alternatives_smp_switch(1);