Subject: x86: Reaplce cpu_pda() by per_cpu and get rid of _cpu_pda() The pda is pointing to the pda which is at the beginning of the per cpu area. This means that cpu_pda and _cpu_pda[] are both pointing at the percpu area! In fact per_cpu() can be used instead of cpu_pda(). Typically the offsets to the per cpu areas are stored in an array _per_cpu_offet (generic per cpu support can then provide more functionality). Introduce that array for x86_64 and get rid of the pda arrays. Signed-off-by: Christoph Lameter Index: linux-2.6/arch/x86/kernel/head64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/head64.c 2008-05-23 20:43:39.000000000 -0700 +++ linux-2.6/arch/x86/kernel/head64.c 2008-05-23 20:49:31.000000000 -0700 @@ -119,6 +119,8 @@ } } +static struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; + void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -156,11 +158,13 @@ early_printk("Kernel alive\n"); - for (i = 0; i < NR_CPUS; i++) - cpu_pda(i) = &boot_cpu_pda[i]; + /* Setup embryonic per cpu areas that consists only of the pdas */ + for (i = 1; i < NR_CPUS; i++) + __per_cpu_offset[i] = (unsigned long)&boot_cpu_pda[i]; + __per_cpu_offset[0] = (unsigned long)__per_cpu_load; pda_init(0); - cpu_pda(0)->data_offset =(unsigned long)__per_cpu_load; + per_cpu(pda.data_offset, 0) = __per_cpu_offset[0]; copy_bootdata(__va(real_mode_data)); reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); Index: linux-2.6/arch/x86/kernel/irq_64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/irq_64.c 2008-05-23 20:42:57.000000000 -0700 +++ linux-2.6/arch/x86/kernel/irq_64.c 2008-05-23 20:43:56.000000000 -0700 @@ -115,37 +115,37 @@ } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); + seq_printf(p, "%10u ", per_cpu(pda.__nmi_count, j)); seq_printf(p, " Non-maskable interrupts\n"); seq_printf(p, "LOC: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); + seq_printf(p, "%10u ", per_cpu(pda.apic_timer_irqs, j)); seq_printf(p, " Local timer interrupts\n"); #ifdef CONFIG_SMP seq_printf(p, "RES: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); + seq_printf(p, "%10u ", per_cpu(pda.irq_resched_count, j)); seq_printf(p, " Rescheduling interrupts\n"); seq_printf(p, "CAL: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); + seq_printf(p, "%10u ", per_cpu(pda.irq_call_count, j)); seq_printf(p, " function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); + seq_printf(p, "%10u ", per_cpu(pda.irq_tlb_count, j)); seq_printf(p, " TLB shootdowns\n"); #endif seq_printf(p, "TRM: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); + seq_printf(p, "%10u ", per_cpu(pda.irq_thermal_count, j)); seq_printf(p, " Thermal event interrupts\n"); seq_printf(p, "THR: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); + seq_printf(p, "%10u ", per_cpu(pda.irq_threshold_count, j)); seq_printf(p, " Threshold APIC interrupts\n"); seq_printf(p, "SPU: "); for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); + seq_printf(p, "%10u ", per_cpu(pda.irq_spurious_count, j)); seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } Index: linux-2.6/arch/x86/kernel/nmi_64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/nmi_64.c 2008-05-23 20:42:57.000000000 -0700 +++ linux-2.6/arch/x86/kernel/nmi_64.c 2008-05-23 20:43:56.000000000 -0700 @@ -100,19 +100,19 @@ #endif for (cpu = 0; cpu < NR_CPUS; cpu++) - prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; + prev_nmi_count[cpu] = per_cpu(pda.__nmi_count, cpu); local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { + if (per_cpu(pda.__nmi_count, cpu) - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], - cpu_pda(cpu)->__nmi_count); + per_cpu(pda.__nmi_count, cpu)); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } Index: linux-2.6/arch/x86/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup.c 2008-05-23 20:43:39.000000000 -0700 +++ linux-2.6/arch/x86/kernel/setup.c 2008-05-23 20:43:56.000000000 -0700 @@ -123,13 +123,11 @@ memcpy(ptr, __per_cpu_load, __per_cpu_size); #ifdef CONFIG_X86_64 - cpu_pda(i)->data_offset = ptr - __per_cpu_start; + per_cpu(pda.data_offset, i) = ptr - __per_cpu_start; /* Relocate the pda */ - memcpy(ptr, cpu_pda(i), sizeof(struct x8664_pda)); - cpu_pda(i) = (struct x8664_pda *)ptr; -#else - __per_cpu_offset[i] = ptr - __per_cpu_start; + memcpy(ptr, &per_cpu(pda, i), sizeof(struct x8664_pda)); #endif + __per_cpu_offset[i] = ptr - __per_cpu_start; highest_cpu = i; } Index: linux-2.6/arch/x86/kernel/setup64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup64.c 2008-05-23 20:42:57.000000000 -0700 +++ linux-2.6/arch/x86/kernel/setup64.c 2008-05-23 20:48:34.000000000 -0700 @@ -34,9 +34,8 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(_cpu_pda); -struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; @@ -89,7 +88,7 @@ void pda_init(int cpu) { - struct x8664_pda *pda = cpu_pda(cpu); + struct x8664_pda *pda = &per_cpu(pda, cpu); /* Setup up data that may be needed in __get_free_pages early */ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); Index: linux-2.6/arch/x86/kernel/smpboot.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/smpboot.c 2008-05-23 20:43:39.000000000 -0700 +++ linux-2.6/arch/x86/kernel/smpboot.c 2008-05-23 20:43:56.000000000 -0700 @@ -855,13 +855,6 @@ printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); return -1; } - - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { - struct x8664_pda *ppda = &per_cpu(pda, cpu); - - memcpy(ppda, cpu_pda(cpu), sizeof(struct x8664_pda)); - cpu_pda(cpu) = ppda; - } #endif alternatives_smp_switch(1); @@ -902,7 +895,7 @@ stack_start.sp = (void *) c_idle.idle->thread.sp; irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; + per_cpu(pda.pcurrent, cpu) = c_idle.idle; init_rsp = c_idle.idle->thread.sp; load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); initial_code = (unsigned long)start_secondary; Index: linux-2.6/arch/x86/kernel/traps_64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/traps_64.c 2008-05-23 20:42:57.000000000 -0700 +++ linux-2.6/arch/x86/kernel/traps_64.c 2008-05-23 20:43:56.000000000 -0700 @@ -263,7 +263,8 @@ const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = + (unsigned long*)per_cpu(pda.irqstackptr, cpu); unsigned used = 0; struct thread_info *tinfo; @@ -397,8 +398,8 @@ unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irqstack_end = (unsigned long *)per_cpu(pda.irqstackptr, cpu); + unsigned long *irqstack = (unsigned long *)(per_cpu(pda.irqstackptr, cpu) - IRQSTACKSIZE); // debugging aid: "show_stack(NULL, NULL);" prints the // back trace for this cpu. @@ -462,7 +463,7 @@ int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = per_cpu(pda.pcurrent, cpu); u8 *ip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; Index: linux-2.6/include/asm-x86/pda.h =================================================================== --- linux-2.6.orig/include/asm-x86/pda.h 2008-05-23 20:43:35.000000000 -0700 +++ linux-2.6/include/asm-x86/pda.h 2008-05-23 20:47:36.000000000 -0700 @@ -37,12 +37,8 @@ unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[]; -extern struct x8664_pda boot_cpu_pda[]; extern void pda_init(int); -#define cpu_pda(i) (_cpu_pda[i]) - /* * There is no fast way to get the base address of the PDA, all the accesses * have to mention %fs/%gs. So it needs to be done this Torvaldian way. Index: linux-2.6/include/asm-x86/percpu.h =================================================================== --- linux-2.6.orig/include/asm-x86/percpu.h 2008-05-23 20:43:41.000000000 -0700 +++ linux-2.6/include/asm-x86/percpu.h 2008-05-23 20:43:56.000000000 -0700 @@ -6,9 +6,7 @@ #include #ifdef CONFIG_SMP -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) #define __my_cpu_offset read_pda(data_offset) -#define per_cpu_offset(x) (__per_cpu_offset(x)) #endif #define __percpu_seg "%%gs:"