=================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/Kconfig,v retrieving revision 1.106 diff -u -r1.106 Kconfig --- smp/arch/i386/Kconfig 25 Feb 2004 04:04:05 -0000 1.106 +++ smp/arch/i386/Kconfig 25 Feb 2004 17:16:59 -0000 @@ -1336,11 +1336,6 @@ depends on !(X86_VISWS || X86_VOYAGER) default y -config X86_TRAMPOLINE - bool - depends on SMP || X86_VISWS - default y - config PC bool depends on X86 && !EMBEDDED =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/defconfig,v retrieving revision 1.108 diff -u -r1.108 defconfig --- smp/arch/i386/defconfig 5 Feb 2004 20:39:00 -0000 1.108 +++ smp/arch/i386/defconfig 25 Feb 2004 17:13:36 -0000 @@ -1195,5 +1195,4 @@ CONFIG_X86_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y -CONFIG_X86_TRAMPOLINE=y CONFIG_PC=y =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/boot/tools/build.c,v retrieving revision 1.4 diff -u -r1.4 build.c --- smp/arch/i386/boot/tools/build.c 7 Mar 2003 15:39:16 -0000 1.4 +++ smp/arch/i386/boot/tools/build.c 21 Feb 2004 01:44:11 -0000 @@ -150,10 +150,8 @@ sz = sb.st_size; fprintf (stderr, "System is %d kB\n", sz/1024); sys_size = (sz + 15) / 16; - /* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */ - if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE)) - die("System is too big. Try using %smodules.", - is_big_kernel ? "" : "bzImage or "); + if (!is_big_kernel && sys_size > DEF_SYSSIZE) + die("System is too big. Try using bzImage or modules."); while (sz > 0) { int l, n; =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/Makefile,v retrieving revision 1.54 diff -u -r1.54 Makefile --- smp/arch/i386/kernel/Makefile 19 Feb 2004 04:45:04 -0000 1.54 +++ smp/arch/i386/kernel/Makefile 25 Feb 2004 17:07:12 -0000 @@ -18,8 +18,7 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp.o smpboot.o -obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_X86_SMP) += smp.o smpboot.o trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/head.S,v retrieving revision 1.30 diff -u -r1.30 head.S --- smp/arch/i386/kernel/head.S 19 Feb 2004 04:55:53 -0000 1.30 +++ smp/arch/i386/kernel/head.S 25 Feb 2004 17:12:19 -0000 @@ -39,50 +39,96 @@ #define X86_CAPABILITY CPU_PARAMS+12 #define X86_VENDOR_ID CPU_PARAMS+36 /* offset dependent on NCAPINTS */ -/* - * Initialize page tables +/* + * This is how much memory *in addition to the memory covered up to + * and including _end* we need mapped initially. We need one bit for + * each possible page, which currently means 2^36/4096/8 = 2 MB + * (64-bit-capable chips can do more, but if you have more than 64 GB + * of memory you *really* should be running a 64-bit kernel. However, + * if this really bothers someone we could query this dynamically.) + * + * The other thing we may want to do dynamically in the future is to + * detect PSE and skip generating the PTEs. + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. */ -#define INIT_PAGE_TABLES \ - movl $pg0 - __PAGE_OFFSET, %edi; \ - /* "007" doesn't mean with license to kill, but PRESENT+RW+USER */ \ - movl $007, %eax; \ -2: stosl; \ - add $0x1000, %eax; \ - cmp $empty_zero_page - __PAGE_OFFSET, %edi; \ - jne 2b; +#define INIT_MAP_BEYOND_END (2*1024*1024) + /* - * swapper_pg_dir is the main page directory, address 0x00101000 - * - * On entry, %esi points to the real-mode code as a 32-bit pointer. + * 32-bit kernel entrypoint; only used by the boot CPU. On entry, + * %esi points to the real-mode code as a 32-bit pointer. + * CS and DS must be 4 GB flat segments, but we don't depend on + * any particular GDT layout, because we load our own as soon as we + * can. */ ENTRY(startup_32) -#ifdef CONFIG_X86_VISWS /* - * On SGI Visual Workstations boot CPU starts in protected mode. + * Set segments to known values. */ - orw %bx, %bx - jnz 1f - INIT_PAGE_TABLES - movl $swapper_pg_dir - __PAGE_OFFSET, %eax - movl %eax, %cr3 - lgdt boot_gdt -1: -#endif + cld + lgdt boot_gdt_descr - __PAGE_OFFSET + movl $(__BOOT_DS),%eax + movl %eax,%ds + movl %eax,%es + movl %eax,%fs + movl %eax,%gs + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond _end. The variable + * init_pg_tables_end is set up to point to the first "safe" location. + * + * Warning: don't use %esi or the stack in this code. However, %esp + * can be used as a GPR if you really need it... + */ +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $((_end-__PAGE_OFFSET) + 0xfff), %edi + andl $(~0xfff), %edi + movl $(swapper_pg_dir - __PAGE_OFFSET), %edx + movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +10: + leal 0x007(%edi),%ecx /* Create PDE entry */ + movl %ecx,(%edx) /* Store identity PDE entry */ + movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ + addl $4,%edx + movl $1024, %ecx +11: + stosl + addl $0x1000,%eax + loop 11b + /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ + /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ + leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + cmpl %ebp,%eax + jb 10b + movl %edi,(init_pg_tables_end - __PAGE_OFFSET) + +#ifdef CONFIG_SMP + xorl %ebx,%ebx /* This is the boot CPU (BSP) */ + jmp 3f /* - * Set segments to known values + * Non-boot CPU entry point; entered from trampoline.S + * We can't lgdt here, because lgdt itself uses a data segment, but + * we know the trampoline has already loaded the boot_gdt_table GDT + * for us. */ +ENTRY(startup_32_smp) cld movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es movl %eax,%fs movl %eax,%gs -#ifdef CONFIG_SMP - orw %bx,%bx - jz 1f + + xorl %ebx,%ebx + incl %ebx /* This is a secondary processor (AP) */ /* * New page tables may be in 4Mbyte page mode and may @@ -99,37 +145,40 @@ * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET - cmpl $0,cr4_bits - je 3f + movl cr4_bits,%edx + andl %edx,%edx + jz 3f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) - orl cr4_bits,%eax + orl %edx,%eax movl %eax,%cr4 - jmp 3f -1: -#endif - INIT_PAGE_TABLES + +3: +#endif /* CONFIG_SMP */ + /* * Enable paging */ -3: movl $swapper_pg_dir-__PAGE_OFFSET,%eax movl %eax,%cr3 /* set the page table pointer.. */ movl %cr0,%eax orl $0x80000000,%eax movl %eax,%cr0 /* ..and set paging (PG) bit */ - jmp 1f /* flush the prefetch-queue */ -1: - movl $1f,%eax - jmp *%eax /* make sure eip is relocated */ + ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 1: /* Set up the stack pointer */ lss stack_start,%esp -#ifdef CONFIG_SMP - orw %bx,%bx - jz 1f /* Initial CPU cleans BSS */ +/* + * Initialize eflags. Some BIOS's leave bits like NT set. This would + * confuse the debugger if this code is traced. + * XXX - best to initialize before switching to protected mode. + */ pushl $0 popfl + +#ifdef CONFIG_SMP + andl %ebx,%ebx + jz 1f /* Initial CPU cleans BSS */ jmp checkCPUtype 1: #endif /* CONFIG_SMP */ @@ -142,21 +191,15 @@ movl $__bss_start,%edi movl $__bss_stop,%ecx subl %edi,%ecx - rep - stosb + shrl $2,%ecx + rep ; stosl /* * start system 32-bit setup. We need to re-do some of the things done * in 16-bit mode for the "real" operations. */ call setup_idt -/* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. - */ - pushl $0 - popfl + /* * Copy bootup parameters out of the way. First 2kB of * _empty_zero_page is for boot parameters, second 2kB @@ -273,7 +316,7 @@ call initialize_secondary jmp L6 1: -#endif +#endif /* CONFIG_SMP */ call start_kernel L6: jmp L6 # main should never return here, but @@ -309,6 +352,8 @@ * and the kernel moved to PAGE_OFFSET. Interrupts * are enabled elsewhere, when we can be relatively * sure everything is ok. + * + * Warning: %esi is live across this function. */ setup_idt: lea ignore_int,%edx @@ -361,10 +406,17 @@ * segment size, and 32-bit linear address value: */ +.globl boot_gdt_descr .globl idt_descr .globl cpu_gdt_descr ALIGN +# early boot GDT descriptor (must use 1:1 address mapping) + .word 0 # 32 bit align gdt_desc.address +boot_gdt_descr: + .word __BOOT_DS+7 + .long boot_gdt_table - __PAGE_OFFSET + .word 0 # 32-bit align idt_desc.address idt_descr: .word IDT_ENTRIES*8-1 # idt contains 256 entries @@ -379,41 +431,25 @@ .fill NR_CPUS-1,8,0 # space for the other GDT descriptors /* - * This is initialized to create an identity-mapping at 0-8M (for bootup - * purposes) and another mapping of the 0-8M area at virtual address - * PAGE_OFFSET. + * swapper_pg_dir is the main page directory, address 0x00101000 + * + * This is initialized to create an identity-mapping at 0 (for bootup + * purposes) and another mapping at virtual address PAGE_OFFSET. The + * values put here should be all invalid (zero); the valid + * entries are created dynamically at boot time. + * + * The code creates enough page tables to map 0-_end, the page tables + * themselves, plus INIT_MAP_BEYOND_END bytes; see comment at beginning. */ .org 0x1000 ENTRY(swapper_pg_dir) - .long 0x00102007 - .long 0x00103007 - .fill BOOT_USER_PGD_PTRS-2,4,0 - /* default: 766 entries */ - .long 0x00102007 - .long 0x00103007 - /* default: 254 entries */ - .fill BOOT_KERNEL_PGD_PTRS-2,4,0 + .fill 1024,4,0 -/* - * The page tables are initialized to only 8MB here - the final page - * tables are set up later depending on memory size. - */ .org 0x2000 -ENTRY(pg0) - -.org 0x3000 -ENTRY(pg1) - -/* - * empty_zero_page must immediately follow the page tables ! (The - * initialization loop counts until empty_zero_page) - */ - -.org 0x4000 ENTRY(empty_zero_page) + .fill 4096,1,0 -.org 0x5000 - +.org 0x3000 /* * Real beginning of normal "text" segment */ @@ -428,19 +464,18 @@ .data /* - * The Global Descriptor Table contains 28 quadwords, per-CPU. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_X86_VISWS) -/* * The boot_gdt_table must mirror the equivalent in setup.S and is - * used only by the trampoline for booting other CPUs + * used only for booting. */ .align L1_CACHE_BYTES ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ -#endif + +/* + * The Global Descriptor Table contains 28 quadwords, per-CPU. + */ .align L1_CACHE_BYTES ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ @@ -488,4 +523,3 @@ #ifdef CONFIG_SMP .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ #endif - =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/setup.c,v retrieving revision 1.107 diff -u -r1.107 setup.c --- smp/arch/i386/kernel/setup.c 19 Feb 2004 04:45:13 -0000 1.107 +++ smp/arch/i386/kernel/setup.c 21 Feb 2004 04:44:02 -0000 @@ -50,6 +50,11 @@ #include "setup_arch_pre.h" #include "mach_resources.h" +/* This value is set up by the early boot code to point to the value + immediately after the boot time page tables. It contains a *physical* + address, and must not be in the .bss segment! */ +unsigned long init_pg_tables_end __initdata = ~0UL; + int disable_pse __initdata = 0; static inline char * __init machine_specific_memory_setup(void); @@ -115,7 +120,6 @@ extern void dmi_scan_machine(void); extern void generic_apic_probe(char *); extern int root_mountflags; -extern char _end[]; unsigned long saved_videomode; @@ -785,7 +789,7 @@ * partially used pages are not usable - thus * we are rounding upwards: */ - start_pfn = PFN_UP(__pa(_end)); + start_pfn = PFN_UP(init_pg_tables_end); find_max_pfn(); @@ -1097,7 +1101,7 @@ init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; - init_mm.brk = (unsigned long) _end; + init_mm.brk = init_pg_tables_end + PAGE_OFFSET; code_resource.start = virt_to_phys(_text); code_resource.end = virt_to_phys(_etext)-1; =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/trampoline.S,v retrieving revision 1.9 diff -u -r1.9 trampoline.S --- smp/arch/i386/kernel/trampoline.S 26 May 2003 23:59:47 -0000 1.9 +++ smp/arch/i386/kernel/trampoline.S 25 Feb 2004 08:34:57 -0000 @@ -23,9 +23,13 @@ * and IP is zero. Thus, data addresses need to be absolute * (no relocation) and are taken with regard to r_base. * - * If you work on this file, check the object module with objdump - * --full-contents --reloc to make sure there are no relocation - * entries except for the gdt one.. + * If you work on this file, check the object module with + * objdump --reloc to make sure there are no relocation + * entries except for: + * + * TYPE VALUE + * R_386_32 startup_32_smp + * R_386_32 boot_gdt_table */ #include @@ -54,22 +58,18 @@ xor %ax, %ax inc %ax # protected mode (PE) bit lmsw %ax # into protected mode - jmp flush_instr -flush_instr: - ljmpl $__BOOT_CS, $0x00100000 - # jump to startup_32 in arch/i386/kernel/head.S + # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S + ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) -boot_idt: - .word 0 # idt limit = 0 - .word 0, 0 # idt base = 0L - -# -# NOTE: here we actually use CPU#0's GDT - but that is OK, we reload -# the proper GDT shortly after booting up the secondary CPUs. -# -ENTRY(boot_gdt) + # These need to be in the same 64K segment as the above; + # hence we don't use the boot_gdt_descr defined in head.S +boot_gdt: .word __BOOT_DS + 7 # gdt limit - .long boot_gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU) + .long boot_gdt_table-__PAGE_OFFSET # gdt base + +boot_idt: + .word 0 # idt limit = 0 + .long 0 # idt base = 0L .globl trampoline_end trampoline_end: =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/kernel/vmlinux.lds.S,v retrieving revision 1.2 diff -u -r1.2 vmlinux.lds.S --- smp/arch/i386/kernel/vmlinux.lds.S 18 Aug 2003 18:17:01 -0000 1.2 +++ smp/arch/i386/kernel/vmlinux.lds.S 21 Feb 2004 05:03:08 -0000 @@ -105,6 +105,7 @@ __bss_start = .; /* BSS */ .bss : { *(.bss) } + . = ALIGN(4); __bss_stop = .; _end = . ; =================================================================== RCS file: /home/hpa/kernel/bkcvs/linux-2.5/arch/i386/mm/discontig.c,v retrieving revision 1.16 diff -u -r1.16 discontig.c --- smp/arch/i386/mm/discontig.c 21 Sep 2003 22:39:20 -0000 1.16 +++ smp/arch/i386/mm/discontig.c 21 Feb 2004 01:48:30 -0000 @@ -66,7 +66,7 @@ extern void one_highpage_init(struct page *, int, int); extern struct e820map e820; -extern char _end; +extern unsigned long init_pg_tables_end; extern unsigned long highend_pfn, highstart_pfn; extern unsigned long max_low_pfn; extern unsigned long totalram_pages; @@ -237,7 +237,7 @@ reserve_pages = calculate_numa_remap_pages(); /* partially used pages are not usable - thus round upwards */ - system_start_pfn = min_low_pfn = PFN_UP(__pa(&_end)); + system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); find_max_pfn(); system_max_low_pfn = max_low_pfn = find_max_low_pfn();