Align and pad x86_64 GDT on page boundary From: Ravikiran G Thirumalai This patch is on the same lines as Zachary Amsden's i386 GDT page alignemnt patch in -mm, but for x86_64. Patch to align and pad x86_64 GDT on page boundries. [AK: some minor cleanups and fixed incorrect TLS initialization in CPU init.] Signed-off-by: Nippun Goel Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Index: linux/arch/x86_64/kernel/head.S =================================================================== --- linux.orig/arch/x86_64/kernel/head.S +++ linux/arch/x86_64/kernel/head.S @@ -379,7 +379,7 @@ gdt: * Also sysret mandates a special GDT layout */ -.align L1_CACHE_BYTES +.align PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ @@ -401,10 +401,11 @@ ENTRY(cpu_gdt_table) gdt_end: /* asm/segment.h:GDT_ENTRIES must match this */ /* This should be a multiple of the cache line size */ - /* GDTs of other CPUs: */ - .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table) + /* GDTs of other CPUs are now dynamically allocated */ + + /* zero the remaining page */ + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 - .align L1_CACHE_BYTES ENTRY(idt_table) .rept 256 .quad 0 Index: linux/arch/x86_64/kernel/setup64.c =================================================================== --- linux.orig/arch/x86_64/kernel/setup64.c +++ linux/arch/x86_64/kernel/setup64.c @@ -213,16 +213,14 @@ void __cpuinit cpu_init (void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - if (cpu) { - memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - } + if (cpu) + memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; - cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); asm volatile("lidt %0" :: "m" (idt_descr)); - memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8); + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); wrmsrl(MSR_FS_BASE, 0); Index: linux/arch/x86_64/kernel/smpboot.c =================================================================== --- linux.orig/arch/x86_64/kernel/smpboot.c +++ linux/arch/x86_64/kernel/smpboot.c @@ -744,6 +744,13 @@ static int __cpuinit do_boot_cpu(int cpu }; DECLARE_WORK(work, do_fork_idle, &c_idle); + /* allocate memory for gdts of secondary cpus. Hotplug is considered */ + if (!cpu_gdt_descr[cpu].address && + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { + printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); + return -1; + } + c_idle.idle = get_idle_for_cpu(cpu); if (c_idle.idle) { Index: linux/arch/x86_64/kernel/suspend.c =================================================================== --- linux.orig/arch/x86_64/kernel/suspend.c +++ linux/arch/x86_64/kernel/suspend.c @@ -120,7 +120,7 @@ void fix_processor_context(void) set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - cpu_gdt_table[cpu][GDT_ENTRY_TSS].type = 9; + cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9; syscall_init(); /* This sets MSR_*STAR and related */ load_TR_desc(); /* This does ltr */ Index: linux/include/asm-x86_64/desc.h =================================================================== --- linux.orig/include/asm-x86_64/desc.h +++ linux/include/asm-x86_64/desc.h @@ -25,7 +25,7 @@ struct n_desc_struct { unsigned int a,b; }; -extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; +extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; enum { GATE_INTERRUPT = 0xE, @@ -79,6 +79,9 @@ extern struct desc_struct default_ldt[]; extern struct gate_struct idt_table[]; extern struct desc_ptr cpu_gdt_descr[]; +/* the cpu gdt accessor */ +#define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -144,20 +147,20 @@ static inline void set_tss_desc(unsigned * -1? seg base+limit should be pointing to the address of the * last valid byte */ - set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], + set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS], (unsigned long)addr, DESC_TSS, IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1); } static inline void set_ldt_desc(unsigned cpu, void *addr, int size) { - set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr, + set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr, DESC_LDT, size * 8 - 1); } static inline void set_seg_base(unsigned cpu, int entry, void *base) { - struct desc_struct *d = &cpu_gdt_table[cpu][entry]; + struct desc_struct *d = &cpu_gdt(cpu)[entry]; u32 addr = (u32)(u64)base; BUG_ON((u64)base >> 32); d->base0 = addr & 0xffff; @@ -199,7 +202,7 @@ static inline void set_seg_base(unsigned static inline void load_TLS(struct thread_struct *t, unsigned int cpu) { - u64 *gdt = (u64 *)(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN); + u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); gdt[0] = t->tls_array[0]; gdt[1] = t->tls_array[1]; gdt[2] = t->tls_array[2];