GIT b668e28b74558b3a8277b5fde363ae5f803b36f0 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git#mm commit b668e28b74558b3a8277b5fde363ae5f803b36f0 Author: Jeremy Fitzhardinge Date: Tue Dec 11 16:54:33 2007 +0100 x86: clean up mm/init_32.c Some code reformatting in init_32.c. No functional change. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2ab64049a40b1ae65540037a43c6c0259f009f07 Author: Jeremy Fitzhardinge Date: Tue Dec 11 16:54:33 2007 +0100 x86: kill mk_pte_huge It only has a single use, which can be trivially replaced. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a035f564b662e930896e87d2f823a0cc9971a9a4 Author: Ingo Molnar Date: Tue Dec 11 16:54:33 2007 +0100 x86: clean up drivers/char/rtc.c tons of style cleanup in drivers/char/rtc.c - no code changed: text data bss dec hex filename 6400 384 32 6816 1aa0 rtc.o.before 6400 384 32 6816 1aa0 rtc.o.after since we seem to have a number of open breakages in this code we might as well start with making the code more readable and maintainable. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d84a5ed46f1a3425ffb1c831e98ee3585477cc6c Author: Randy Dunlap Date: Tue Dec 11 16:54:33 2007 +0100 x64/page.h: convert some macros to inlines Convert clear_page/copy_page macros to inline functions for type-checking. Andrew wants to extirpate these ugly macros. (Ingo too. Thomas as well. Please send us more "kill ugly macros" patches! :-) Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3a272d8ac5d35b3c53ada731569ca054d282856c Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:33 2007 +0100 remove arch specific segment headers This file puts the remainder of the arch specificic segment headers in segment.h. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 477c361fce3f41b051ad2f5cbff69f3a5e9779f3 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:32 2007 +0100 unify common parts of segment.h Although segment handling in i386 and x86_64 are very different, there's a common part. Put them in segment.h instead of arch specific headers Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 50c7fb5a81d2d38e6d0853c2953441407ebb88a5 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:32 2007 +0100 put get_kernel_rpl in a common location This macro is useful for both i386 and x86_64, so put it in a common location, where both arches can grab it. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit f0e3b6b3f2b54458b5c86ae35cc6bf2f24ae9bb2 Author: Markus Metzger Date: Tue Dec 11 16:54:32 2007 +0100 x86, ptrace: support for branch trace store(BTS) Resend using different mail client Changes to the last version: - split implementation into two layers: ds/bts and ptrace - renamed TIF's - save/restore ds save area msr in __switch_to_xtra() - make block-stepping only look at BTF bit Signed-off-by: Markus Metzger Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit c6bf19608e39e36e97e8ec218a43afec1861a0b2 Author: Jeff Dike Date: Tue Dec 11 16:54:32 2007 +0100 UML - change sigcontext fields to match x86 git-x86, in commit 70aa1bd3839e3ec74ce65316528a82570e8de666, changed a lot of the sigcontext field names. This patch changes UML usage to match. I also changed includes of generic headers from "" to <>. Signed-off-by: Jeff Dike Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e183a2382f0916fd64a7c54065b4091beb78729a Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:32 2007 +0100 unify system.h This patch finishes the unification of system.h file. i386 needs a constant to be defined, and it is defined inside an ifdef Other than that, pretty much nothing but includes are left in the arch specific headers, and they are deleted. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit cf7ccddb46c08f9e0b87d7e714d160c3983e88c2 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:31 2007 +0100 move switch_to macro to system.h This patch moves the switch_to() macro to system.h As those macros are fundamentally different between i386 and x86_64, they are enclosed around an ifdef. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 0a31e630c297a429638bb39efe411f6ac9d5cce3 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:31 2007 +0100 unify smp parts of system.h The memory barrier parts of system.h are not very different between i386 and x86_64, the main difference being the availability of instructions, which we handle with the use of ifdefs. They are consolidated in system.h file, and then removed from the arch-specific headers. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 6dec9cb6283e3ab5b7fef63da3eb8f8c5602c480 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:31 2007 +0100 remove unused macro Mr. Grep says warn_if_not_ulong() is not used anymore anywhere in the code. So, we remove it. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit aeba0eab1d788785e21681ca9c617b503d5c7ea1 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:31 2007 +0100 unify paravirt parts of system.h This patch moves the i386 control registers manipulation functions, wbinvd, and clts functions to system.h. They are essentially the same as in x86_64. With this, system.h paravirt comes for free in x86_64. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit dc7114ea029fad39cb20ffa9c42710c55b3ba312 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:31 2007 +0100 remove references to cr8 register As pointed out by Andi, linux never really uses this register so saving and restoring is not really necessary. This patch removes all references to it. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d86153abbfbc808a811021e0c88901efdd852d24 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:30 2007 +0100 unify load_segment macro This patch unifies the load_segment() macro, making them equal in both x86_64 and i386 architectures. The common version goes to system.h, and the old are deleted. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit bf1205cf81b8c5ad594b0cc8357616de5fb9f127 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:30 2007 +0100 put together equal pieces of system.h This patch puts together pieces of system_{32,64}.h that looks like the same. It's the first step towards integration of this file. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 380c5f157b71776d02ee31254583f8c2f32315fe Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:30 2007 +0100 remove volatile keyword from clflush. the p parameter is an explicit memory reference, and is enough to prevent gcc to being nasty here. The volatile seems completely not needed. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 8374dfdc4590a63608f2e99059d1a280c95f543a Author: Joerg Roedel Date: Tue Dec 11 16:54:30 2007 +0100 x86_64: some whitespace cleanups in paging code This patch does some whitespace cleanups in the paging code to fix some checkpatch.pl warnings of my formerly merged cleanup patches. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4c095886b945d59e0a39f6e25ff637a72b80371e Author: Andrew Morton Date: Tue Dec 11 16:54:30 2007 +0100 pie-executable-randomization-uninlining Cc: "Luck, Tony" Cc: Arjan van de Ven Cc: Jakub Jelinek Cc: Jiri Kosina Cc: KAMEZAWA Hiroyuki Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d4407e0b0ed7acac4e6714ec3f91d7aa0587b5d7 Author: Andrew Morton Date: Tue Dec 11 16:54:30 2007 +0100 pie-executable-randomization-checkpatch-fixes #39: FILE: arch/ia64/ia32/binfmt_elf32.c:229: +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused) WARNING: no space between function name and open parenthesis '(' #39: FILE: arch/ia64/ia32/binfmt_elf32.c:229: +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused) WARNING: line over 80 characters #67: FILE: arch/x86/kernel/sys_x86_64.c:80: + new_begin = randomize_range(*begin, *begin + 0x02000000, 0); ERROR: use tabs not spaces #110: FILE: arch/x86/kernel/sys_x86_64.c:185: + ^I mm->cached_hole_size = 0;$ ERROR: use tabs not spaces #111: FILE: arch/x86/kernel/sys_x86_64.c:186: + ^I^Imm->free_area_cache = mm->mmap_base;$ ERROR: use tabs not spaces #112: FILE: arch/x86/kernel/sys_x86_64.c:187: + ^I}$ ERROR: use tabs not spaces #141: FILE: arch/x86/kernel/sys_x86_64.c:216: + ^I^I/* remember the largest hole we saw so far */$ ERROR: use tabs not spaces #142: FILE: arch/x86/kernel/sys_x86_64.c:217: + ^I^Iif (addr + mm->cached_hole_size < vma->vm_start)$ ERROR: use tabs not spaces #143: FILE: arch/x86/kernel/sys_x86_64.c:218: + ^I^I mm->cached_hole_size = vma->vm_start - addr;$ ERROR: use tabs not spaces #157: FILE: arch/x86/kernel/sys_x86_64.c:232: + ^Imm->free_area_cache = TASK_UNMAPPED_BASE;$ ERROR: need a space before the open parenthesis '(' #291: FILE: arch/x86/mm/mmap_64.c:101: + } else if(mmap_is_legacy()) { WARNING: braces {} are not necessary for single statement blocks #302: FILE: arch/x86/mm/mmap_64.c:112: + if (current->flags & PF_RANDOMIZE) { + mm->mmap_base += ((long)rnd) << PAGE_SHIFT; + } WARNING: line over 80 characters #314: FILE: fs/binfmt_elf.c:48: +static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long); WARNING: no space between function name and open parenthesis '(' #314: FILE: fs/binfmt_elf.c:48: +static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long); WARNING: line over 80 characters #429: FILE: fs/binfmt_elf.c:438: + eppnt, elf_prot, elf_type, total_size); ERROR: need space after that ',' (ctx:VxV) #480: FILE: fs/binfmt_elf.c:939: + elf_prot, elf_flags,0); ^ total: 9 errors, 7 warnings, 461 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. Please run checkpatch prior to sending patches Cc: "Luck, Tony" Cc: Arjan van de Ven Cc: Jakub Jelinek Cc: Jiri Kosina Cc: KAMEZAWA Hiroyuki Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 111261328cf6e564e5320132a51131dc16ec4908 Author: Jiri Kosina Date: Tue Dec 11 16:54:29 2007 +0100 PIE executable randomization main executable of (specially compiled/linked -pie/-fpie) ET_DYN binaries onto a random address (in cases in which mmap() is allowed to perform a randomization). The code has been extraced from Ingo's exec-shield patch http://people.redhat.com/mingo/exec-shield/ [akpm@linux-foundation.org: fix used-uninitialsied warning] [kamezawa.hiroyu@jp.fujitsu.com: fixed ia32 ELF on x86_64 handling] Signed-off-by: Jiri Kosina Cc: KAMEZAWA Hiroyuki Cc: Arjan van de Ven Cc: Roland McGrath Cc: Jakub Jelinek Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 692121ec573be347b3cf2db5528f578b78610260 Author: Harvey Harrison Date: Tue Dec 11 16:54:29 2007 +0100 x86: Unify include/asm-x86/linkage_[32|64].h Remove definitions of FASTCALL/fastcall from linkage_32 as compiled with -regparm=3 by default since 2.6.20 and should no longer be needed. CONFIG X86_64 and CONFIG_X86_ALIGNMENT_16 are mutually exclusive as found in Kconfig.cpu so it should be fine to test them separately. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 23fec1ce1909c1badf17edc033dd4dda3edc9933 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:29 2007 +0100 integrate i386 and x86_64 code in msr.h This patches proceeds with the integration of msr.h, making the code unified, instead of having a version for each architecture. We stick with the native_* functions, and then paravirt comes for free. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit de70111229d0a34c519b59e846ad2480cfa482a8 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:29 2007 +0100 make fixups wordsize agnostic This patch uses the _ASM_ALIGN and _ASM_PTR macros to make the fixups in native_read/write_msr_safe look the same for x86_64 and i386. Besides using this macros, we also have to take the explicit instruction suffixes out. It's okay because all this instructions uses registers, and can be sized by them. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3e05c0dae575ab8ea168c64ab66b8dfc3d82bf22 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:29 2007 +0100 change write msr functions interface This patche changes the native_write_msr() and friends interface to explicitly take 2 32-bit registers instead of a 64-bit value. The change will ease the merge with 64-bit code. As the 64-bit value will be passed as two registers anyway in i386, the PVOP_CALL interface has to account for that and use low/high parameters It would force the x86_64 version to be different. The change does not make i386 generated code less efficient. As said above, it would get the values from two registers anyway. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit fcf12c80f63da42a6e6132557a80bc52f3a1bcfe Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:29 2007 +0100 change rdpmc interface the rdpmc instruction gets a counter argument in rcx. However, the i386 version was ignoring it. To make both x86_64 and i386 versions the same, as well as to comply with the instruction semantics, this parameter is added in the i386 version Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2df8317a8a487b3108149ad09566af28d723b619 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:28 2007 +0100 introduce native_read_tscp Targetting paravirt, this patch introduces native_read_tscp, in place of rdtscp() macro. When in a paravirt guest, this will involve a function call, and thus, cannot be done in the vdso area. These users then have to call the native version directly Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2902a275d21c96e3510f7a37444286274cee8832 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:28 2007 +0100 unify cpuid functions cpuid is not very different between i386 and x86_64. We move away the x86_64 version from msr.h, and unify them at processor.h, where they belong. cpuid() paravirt then comes for free. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 8b3e6fa25d42b746b93f04efe674e45aa456ca81 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:28 2007 +0100 split get_cycles_sync This patch splits get_cycles_sync() into __get_cycles_sync(), and the rdtscll part. Paravirt guests cannot issue rdtscl directly, as it involves a function call in vdso area. So, using the __get_cycles_sync() base, we introduce vget_cycles_sync, which then calls the native version of rdtscll. Ideally, however, a guest should define its own clocksource, together with a vread function Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 00306c35382881c96591c35c2f6a1f551201a24a Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:28 2007 +0100 allow sched clock to be overridden by paravirt This patch turns the sched_clock into native_sched_clock. sched clock becomes a weak symbol, which can then give its place to a paravirt definition. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3364171443a6d8d904ca1db3a46fb4eb0ac9f84c Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:27 2007 +0100 unify msr smp funcs The functions under #ifdef CONFIG_SMP in msr.h are the same for both x86_64 and i386, and this patches removes one of them, putting them in a single location Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 340640451d0562f45a75f40171a0cedda3d09f30 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:54:27 2007 +0100 Wipe out traditional opt from x86_64 Makefile Among other things, using -traditional as a gcc option stops us from using macro token pasting, which is a feature we heavily rely on. There was still a use of -traditional in arch/x86/kernel/Makefile_64, which this patch removes. I don't see any problems building kernels in my x86_64 box without -traditional. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Steven Rostedt Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit be8aa9e34284902ea9019f7c9f993c818b0e2f8b Author: Harvey Harrison Date: Tue Dec 11 16:54:27 2007 +0100 x86: Use def_bool where possible in Kconfig.cpu x86: Use def_bool where possible in Kconfig.cpu Change occurances of: bool default X to: def_bool X Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 77b6265a6c56695e0b22e7528a5d4fdd82d55eec Author: Hiroshi Shimamoto Date: Tue Dec 11 16:54:27 2007 +0100 x86: clean up process_32/64.c White space and coding style clean up. Make process_32/64.c similar. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 710ce369a1d9b9332d48848c0fc1b0b2d929a409 Author: Harvey Harrison Date: Tue Dec 11 16:54:27 2007 +0100 x86: Use def_bool where possible Change occurances of: bool default X to: def_bool X Change ocurances of: bool "Foo" default X to: def_bool X prompt "Foo" Purely mechanical changes, applies on top of your mm lineup. Shows no difference in generated config for allmodconfig/alyesconfig. If you aren't interested in these kind of patches, just let me know. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ae673837f855f4cae9296f43228acb076daa3340 Author: Joerg Roedel Date: Tue Dec 11 16:54:26 2007 +0100 x86_64: use __PAGE_KERNEL_EXEC in ioremap_64.c This patch replaces the manual permission setup for pages in ioremap_64.c with the pre-defined __PAGE_KERNEL_EXEC value. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 00fa370b8b89e3182011d30465fae4c061772580 Author: Joerg Roedel Date: Tue Dec 11 16:54:26 2007 +0100 x86_64: use __PAGE_KERNEL* instead of _KERNPG_TABLE This minor cleanup replaces _KERNPG_TABLE with the __PAGE_KERNEL* for 2MB PTEs in the x86_64 memory initialization code. The __PAGE_KERNEL* defines are more appropriate for PTEs. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit af7ef54581f1960477b23168091a8dfea6c041ea Author: Joerg Roedel Date: Tue Dec 11 16:54:26 2007 +0100 x86_64: define all _PAGE_* in terms of _PAGE_BIT_* This patch defines the _PAGE_* paging attributes in pgtable_64.h in terms of the former defined _PAGE_BIT_* values. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b65ef7c08c817ca7355a389d3e23055c24ed639c Author: Len Brown Date: Tue Dec 11 16:54:26 2007 +0100 x86: 32-bit IOAPIC: de-fang IRQ compression commit c434b7a6aedfe428ad17cd61b21b125a7b7a29ce (x86: avoid wasting IRQs for PCI devices) created a concept of "IRQ compression" on i386 to conserve IRQ numbers on systems with many sparsely populated IO APICs. The same scheme was also added to x86_64, but later removed when x86_64 recieved an IRQ over-haul that made it unnecessary -- including per-CPU IRQ vectors that greatly increased the IRQ capacity on the machine. i386 has not received the analogous over-haul, and thus a previous attempt to delete IRQ compression from i386 was rejected on the theory that there may exist machines that actually need it. The fact is that the author of IRQ compression patch was unable to confirm the actual existence of such a system. As a result, all i386 kernels with IOAPIC support pay the following: 1. confusion IRQ compression re-names the traditional IOAPIC pin numbers (aka ACPI GSI's) into sequential IRQ #s: ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 20 (level, low) -> IRQ 16 ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 21 (level, low) -> IRQ 17 ACPI: PCI Interrupt 0000:00:1c.2[C] -> GSI 22 (level, low) -> IRQ 18 ACPI: PCI Interrupt 0000:00:1c.3[D] -> GSI 23 (level, low) -> IRQ 19 ACPI: PCI Interrupt 0000:00:1c.4[A] -> GSI 20 (level, low) -> IRQ 16 This makes /proc/interrupts look different depending on system configuration and device probe order. It is also different than the x86_64 kernel running on the exact same system. As a result, programmers get confused when comparing systems. 2. complexity The IRQ code in Linux is already overly complex, and IRQ compression makes it worse. There have already been two bug workarounds related to IRQ compression -- the IRQ0 timer workaround and the VIA PCI IRQ workaround. 3. size All i386 kernels with IOAPIC support contain an int[4096] -- a 4 page array to contain the renamed IRQs. So while the irq compression code on i386 should really be deleted -- even before merging the x86_64 irq-overhaul, this patch simply disables it on all high volume systems to avoid problems #1 and #2 on most all i386 systems. A large system with pin numbers >=64 will still have compression to conserve limited IRQ numbers for sparse IOAPICS. However, the vast majority of the planet, those with only pin numbers < 64 will use an identity GSI -> IRQ mapping. Signed-off-by: Len Brown Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Acked-by: "Eric W. Biederman" commit 03eff7f1930e74f3d3b1281d2920d67de5a64a82 Author: akpm@linux-foundation.org Date: Tue Dec 11 16:54:26 2007 +0100 x86: fix typo in ptrace.c > arch/x86/kernel/ptrace.c: In function 'set_segment_reg': > arch/x86/kernel/ptrace.c:226: error: label at end of compound statement Signed-off-by: Ingo Molnar commit 86bbaad9a3e50b15ad4e9c8b7f4a424f00f83db2 Author: Roland McGrath Date: Tue Dec 11 16:54:26 2007 +0100 x86 ptrace getreg/putreg merge invalid_selector() didn't need to be implemented as a macro hence it shouldn't have been. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit c1e38cfa06022e4b2cd9d2c64149b33ee3a4147e Author: H. Peter Anvin Date: Tue Dec 11 16:54:24 2007 +0100 x86: use generic register name in the thread and tss structures This changes size-specific register names (eip/rip, esp/rsp, etc.) to generic names in the thread and tss structures. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e3877c4af0e489a33b10d602e29f22ef20590b33 Author: Roland McGrath Date: Tue Dec 11 16:54:24 2007 +0100 x86 ptrace merge removals This removes the old separate 64-bit and ia32 ptrace source files. They are no longer used. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1f9b5b3a6f82ff771f950ab765b927a6e72c5a16 Author: Roland McGrath Date: Tue Dec 11 16:54:23 2007 +0100 x86 ptrace merge complete This switches over the 64-bit build to use the shared ptrace code, instead of the old ptrace_64.c and arch/x86/ia32/ptrace32.c code. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 9541fd39d2ee3c4a62ae729024c89106916edf15 Author: Roland McGrath Date: Tue Dec 11 16:54:23 2007 +0100 x86 ia32 ptrace arch merge This moves the sys32_ptrace code into arch/x86/kernel/ptrace.c, verbatim except for a few hard-coded sizes replaced with sizeof. Here this code can use the shared local functions in this file. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 02d965fed890b602a2361935fa4b1e3dfb215868 Author: Roland McGrath Date: Tue Dec 11 16:54:23 2007 +0100 x86 ia32 ptrace getreg/putreg merge This reimplements the 64-bit IA32-emulation register access functions in arch/x86/kernel/ptrace.c, where they can share some guts with the native access functions directly. These functions are not used yet, but this paves the way to move IA32 ptrace support into this file to share its local functions. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b3f59cbe2216398b36772968d59acdae3d413d6a Author: Roland McGrath Date: Tue Dec 11 16:54:23 2007 +0100 x86 ptrace merge syscall trace This moves the 64-bit syscall tracing functions into ptrace.c, so that ptrace_64.c becomes entirely obsolete. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1a97b472842a9d49443f7622a8342f723ec0f51b Author: Roland McGrath Date: Tue Dec 11 16:54:23 2007 +0100 x86 ptrace arch merge This adds 64-bit support to arch_ptrace in arch/x86/kernel/ptrace.c, so this function can be used for native ptrace on both 32 and 64. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 02762584f35832e3e7a182a062ea6f819c678c3a Author: Roland McGrath Date: Tue Dec 11 16:54:22 2007 +0100 x86 ptrace getreg/putreg merge This merges 64-bit support into the low-level register access functions in arch/x86/kernel/ptrace.c, paving the way to share this file between 32-bit and 64-bit builds. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 8bd3cc7fe800063d71c27e8f45c6d5469cdc12d5 Author: Roland McGrath Date: Tue Dec 11 16:54:22 2007 +0100 x86 ptrace getreg/putreg cleanup This cleans up the getreg/putreg functions to move the special cases (segment registers and eflags) out into their own subroutines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2609bfe8c77c3ffa3052ea3c23d525db6a764b1f Author: Roland McGrath Date: Tue Dec 11 16:54:22 2007 +0100 x86: ptrace FLAG_MASK cleanup This cleans up the FLAG_MASK macro to use symbolic constants instead of a magic number. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 9771bf0be289b9c36ae4af4f4f1c7851d2194ae2 Author: Roland McGrath Date: Tue Dec 11 16:54:22 2007 +0100 x86: ptrace_32 renamed This renames ptrace_32.c back to ptrace.c, in preparation for merging the 32/64 versions of these files. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b125d644f09ffd34a6568de3de165b34d54ff2ba Author: Roland McGrath Date: Tue Dec 11 16:54:21 2007 +0100 x86-32 thread_struct.debugreg This replaces the debugreg[7] member of thread_struct with individual members debugreg0, etc. This saves two words for the dummies 4 and 5, and harmonizes the code between 32 and 64. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit f09f41cf27019fe6df77d9477b864beb56dd488c Author: Roland McGrath Date: Tue Dec 11 16:54:21 2007 +0100 x86-64 ia32 ptrace get/putreg32 current task This generalizes the getreg32 and putreg32 functions so they can be used on the current task, as well as on a task stopped in TASK_TRACED and switched off. This lays the groundwork to share this code for all kinds of user-mode machine state access, not just ptrace. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 114aa8deff27e2260c63d25704d16dc82a4c05a1 Author: Roland McGrath Date: Tue Dec 11 16:54:21 2007 +0100 x86-32 ptrace get/putreg current task This generalizes the getreg and putreg functions so they can be used on the current task, as well as on a task stopped in TASK_TRACED and switched off. This lays the groundwork to share this code for all kinds of user-mode machine state access, not just ptrace. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit dc4362eaf692c2b0ad2e9cf5b7c7ca15677eb64e Author: Roland McGrath Date: Tue Dec 11 16:54:21 2007 +0100 x86-64 ptrace get/putreg current task This generalizes the getreg and putreg functions so they can be used on the current task, as well as on a task stopped in TASK_TRACED and switched off. This lays the groundwork to share this code for all kinds of user-mode machine state access, not just ptrace. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a1472754f2631796dbf25f0a1160032a8bca770b Author: Roland McGrath Date: Tue Dec 11 16:54:18 2007 +0100 x86-32 ptrace whitespace This canonicalizes the indentation in the getreg and putreg functions. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d80612809ef4011b31a0dbd57279d426c567bea4 Author: Roland McGrath Date: Tue Dec 11 16:54:18 2007 +0100 x86-64 ptrace whitespace This canonicalizes the indentation in the getreg and putreg functions. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit deaca215e8b988de04ee5ed06deec462bf8d0750 Author: Roland McGrath Date: Tue Dec 11 16:54:18 2007 +0100 x86-64 ia32 ptrace pt_regs cleanup This cleans up the getreg32/putreg32 functions to use struct pt_regs in a straightforward fashion, instead of equivalent ugly pointer arithmetic. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit c1f4ddc330974b7ef2256e14218481030dc4c9d4 Author: Roland McGrath Date: Tue Dec 11 16:54:17 2007 +0100 x86: eflags enum This removes the EF_* enum from . It is no longer used, and duplicates the X86_EFLAGS_* constants from . Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b554c65c53644917d15576f013aed91169a5f49e Author: Roland McGrath Date: Tue Dec 11 16:54:17 2007 +0100 x86: setup64 eflags constants This cleans up arch/x86/kernel/setup64.c to use the X86_EFLAGS_* constants from instead of the EF_* enum in . Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1aa68ff48cd51a75b723902cd18ff8ded8cf802e Author: H. Peter Anvin Date: Tue Dec 11 16:54:17 2007 +0100 x86: use generic register names in struct sigcontext Switch struct sigcontext (defined in ) to using register names withut e- or r-prefixes for both 32- and 64-bit x86. This is intended as a preliminary step in unifying this code between architectures. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 83511c4323ed2bd253b66ff8b64b5ebd451ad153 Author: H. Peter Anvin Date: Tue Dec 11 16:54:17 2007 +0100 x86: Use generic register names in struct user_regs_struct Switch struct user_regs_struct (defined in , which is no longer exported to userspace) to using register names without e- or r-prefixes for both 32 and 64 bit x86. This is intended as a preliminary step in unifying this code between architectures. Also, be a bit more strict in truncating 32-bit "extended" segment register values to 16 bits. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 8c4ce71c400d8d334881856c86d2a982ae0d91d2 Author: H. Peter Anvin Date: Tue Dec 11 16:54:16 2007 +0100 x86: rename the struct pt_regs members for 32/64-bit consistency We have a lot of code which differs only by the naming of specific members of structures that contain registers. In order to enable additional unifications, this patch drops the e- or r- size prefix from the register names in struct pt_regs, and drops the x- prefixes for segment registers on the 32-bit side. This patch also performs the equivalent renames in some additional places that might be candidates for unification in the future. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b4329d1a6d54e23d321e2290bc0fadb74137bd25 Author: Jeremy Fitzhardinge Date: Tue Dec 11 16:54:16 2007 +0100 x86: add set/clear_cpu_cap operations The patch to suppress bitops-related warnings added a pile of ugly casts. Many of these were related to the management of x86 CPU capabilities. Clean these up by adding specific set/clear_cpu_cap macros, and use them consistently. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit f55d0d6da3274a5179a65f7fd4daabf7fca09f29 Author: Jeremy Fitzhardinge Date: Tue Dec 11 16:54:16 2007 +0100 x86: clean up bitops-related warnings Add casts to appropriate places to silence spurious bitops warnings. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 489b5c202be14c42d578897c2ac7b602730efa36 Author: Jeremy Fitzhardinge Date: Tue Dec 11 16:54:15 2007 +0100 x86: partial unification of asm-x86/bitops.h This unifies the set/clear/test bit functions of asm/bitops.h. I have not attempted to merge the bit-finding functions, since they rely on the machine word size and can't be easily restructured to work generically without a lot of #ifdefs. In particular, the 64-bit code can assume the presence of conditional move instructions, whereas 32-bit needs to be more careful. The inline assembly for the bit operations has been changed to remove explicit sizing hints on the instructions, so the assembler will pick the appropriate instruction forms depending on the architecture and the context. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Linus Torvalds Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1f356cf6f80490978f92bac029b7884fa7e1f925 Author: Pavel Machek Date: Tue Dec 11 16:54:15 2007 +0100 time: more timer related cleanups I was confused by FSEC = 10^15 NSEC statement, plus small whitespace fixes. When there's copyright, there should be GPL. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e63707dd20ec07856d45eaea0161ce0242dec306 Author: Pavel Machek Date: Tue Dec 11 16:54:15 2007 +0100 time: timer cleanups Small cleanups to tick-related code. Wrong preempt count is followed by BUG(), so it is hardly KERN_WARNING. Signed-off-by: Pavel Machek Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 7b0245c43f8c1e8fa28124404f6aac716ac873e8 Author: Pavel Machek Date: Tue Dec 11 16:54:15 2007 +0100 time: clean hungarian notation from timers Clean up hungarian notation from timer code. Signed-off-by: Pavel Machek Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 93b26d0a613b0bf501eef2f6b13ec9a20ebc4635 Author: akpm@linux-foundation.org Date: Tue Dec 11 16:54:14 2007 +0100 + mm-prevent-dereferencing-non-allocated-per_cpu-variables-fix.patch added to -mm tree The patch titled mm-prevent-dereferencing-non-allocated-per_cpu-variables-fix has been added to the -mm tree. Its filename is mm-prevent-dereferencing-non-allocated-per_cpu-variables-fix.patch Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a6d13e672f1c33629c1fa4240a611d5ae965ab50 Author: akpm@linux-foundation.org Date: Tue Dec 11 16:54:14 2007 +0100 + mm-prevent-dereferencing-non-allocated-per_cpu-variables.patch added to -mm tree The patch titled prevent dereferencing non-allocated per_cpu variables has been added to the -mm tree. Its filename is mm-prevent-dereferencing-non-allocated-per_cpu-variables.patch Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d245af85060a948155590cf0c288348ea58b26b9 Author: Roland McGrath Date: Tue Dec 11 16:54:14 2007 +0100 x86: PTRACE_SINGLEBLOCK This adds the PTRACE_SINGLEBLOCK request on x86, matching the ia64 feature. The implementation comes from the generic ptrace code and relies on the low-level machine support provided by arch_has_block_step() et al. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 92eb690b04898c1bec42695892920d26e83c7fc9 Author: Roland McGrath Date: Tue Dec 11 16:54:14 2007 +0100 x86: debugctlmsr kprobes This adjusts the x86 kprobes implementation to cope with per-thread MSR_IA32_DEBUGCTLMSR being set for user mode. I haven't delved deep enough into the kprobes code to be really sure this covers all the cases where the user-mode BTF setting needs to be cleared or restored. It looks about right to me. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit fd5d8365b642017879b2a1105c2dda009b3c8991 Author: Roland McGrath Date: Tue Dec 11 16:54:14 2007 +0100 x86: debugctlmsr arch_has_block_step This implements user-mode step-until-branch on x86 using the BTF bit in MSR_IA32_DEBUGCTLMSR. It's just like single-step, only less so. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 96e26b23b22fd46b3ee13bf4d87dab26ec35198a Author: Roland McGrath Date: Tue Dec 11 16:54:14 2007 +0100 x86: debugctlmsr context switch This adds low-level support for a per-thread value of MSR_IA32_DEBUGCTLMSR. The per-thread value is switched in when TIF_DEBUGCTLMSR is set. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 69faa0fd17ef9a151852b56e8e6d1aebcc0cadd1 Author: Roland McGrath Date: Tue Dec 11 16:54:13 2007 +0100 x86: debugctlmsr kconfig This adds the (internal) Kconfig macro CONFIG_X86_DEBUGCTLMSR, to be defined when configuring to support only hardware that definitely supports MSR_IA32_DEBUGCTLMSR with the BTF flag. The Intel documentation says "P6 family" and later processors all have it. I think the Kconfig dependencies are right to have it set for those and unset for others (i.e., when 586 and earlier are supported). Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 33f593caf53381efaac5b43cff38a5064512cf14 Author: Roland McGrath Date: Tue Dec 11 16:54:13 2007 +0100 x86: debugctlmsr constants This adds constant macros for a few of the bits in MSR_IA32_DEBUGCTLMSR. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b89e3f35d9060ef7a8a77677d64ec34e6c038b36 Author: Roland McGrath Date: Tue Dec 11 16:54:12 2007 +0100 ptrace: generic PTRACE_SINGLEBLOCK This makes ptrace_request handle PTRACE_SINGLEBLOCK along with PTRACE_CONT et al. The new generic code makes use of the arch_has_block_step macro and generic entry points on machines that define them. [ mingo@elte.hu: bugfix ] Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 41101245dfd53fafa194ad4eed8680b64d03103d Author: Roland McGrath Date: Tue Dec 11 16:54:12 2007 +0100 ptrace: arch_has_block_step This defines the new macro arch_has_block_step() in linux/ptrace.h, a default for when asm/ptrace.h does not define it. This is the analog of arch_has_single_step() for step-until-branch on machines that have it. It declares the new user_enable_block_step function, which goes with the existing user_enable_single_step and user_disable_single_step. This is not used yet, but paves the way to harmonize on this interface for the arch-specific calls on all machines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2d8f989feff799d6de20ca37b062c3f71524e79c Author: Roland McGrath Date: Tue Dec 11 16:54:11 2007 +0100 x86-32 ptrace debugreg cleanup This cleans up the 32-bit ptrace code to separate the guts of the debug register access from the implementation of PTRACE_PEEKUSR and PTRACE_POKEUSR. The new functions ptrace_[gs]et_debugreg match the new 64-bit entry points for parity, but they don't need to be global. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit eb4bb86d0d31cea6975f6439bd4a680be8895143 Author: Roland McGrath Date: Tue Dec 11 16:54:11 2007 +0100 x86-64 ia32 ptrace debugreg cleanup This cleans up the ia32 compat ptrace code to use shared code from native ptrace for the implementation guts of debug register access. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 747e89f1391c9a97ef0e9f990fdf898c41c3ff80 Author: Roland McGrath Date: Tue Dec 11 16:54:11 2007 +0100 x86-64 ptrace debugreg cleanup This cleans up the 64-bit ptrace code to separate the guts of the debug register access from the implementation of PTRACE_PEEKUSR and PTRACE_POKEUSR. The new functions ptrace_[gs]et_debugreg are made global so that the ia32 code can later be changed to call them too. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 47fff8a43947aa6054f3bcf4ae5a46d391ad5f71 Author: Roland McGrath Date: Tue Dec 11 16:54:11 2007 +0100 x86-64 ptrace: use task_pt_regs This cleans up the 64-bit ptrace code to use task_pt_regs instead of its own redundant code that does the same thing a different way. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 82fb9c9ddb179e46e863595034b145b7eaa4d77d Author: Roland McGrath Date: Tue Dec 11 16:54:11 2007 +0100 x86-32 ptrace: use task_pt_regs This cleans up the 32-bit ptrace code to use task_pt_regs instead of its own redundant code that does the same thing a different way. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 08d23aa27b7c953715286abf07cbe05b550c721a Author: Roland McGrath Date: Tue Dec 11 16:54:11 2007 +0100 powerpc: ptrace generic resume This removes the handling for PTRACE_CONT et al from the powerpc ptrace code, so it uses the new generic code via ptrace_request. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1ab20c0e2b72c2004c1a546ecdc4ee55f52e8d04 Author: Roland McGrath Date: Tue Dec 11 16:54:10 2007 +0100 powerpc: arch_has_single_step This defines the new standard arch_has_single_step macro. It makes the existing set_single_step and clear_single_step entry points global, and renames them to the new standard names user_enable_single_step and user_disable_single_step, respectively. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 76e8fd637c7ce50cf1fa91e9ac8c3b7353005f24 Author: Roland McGrath Date: Tue Dec 11 16:54:10 2007 +0100 x86-32: ptrace generic resume This removes the handling for PTRACE_CONT et al from the 32-bit ptrace code, so it uses the new generic code via ptrace_request. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 233bdc3ec059d63daa19390f1e78fceddfe714a0 Author: Roland McGrath Date: Tue Dec 11 16:54:10 2007 +0100 x86-64: ptrace generic resume This removes the handling for PTRACE_CONT et al from the 64-bit ptrace code, so it uses the new generic code via ptrace_request. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 83f267a31a3a023d4fa2a3d26386c0fc033a9356 Author: Roland McGrath Date: Tue Dec 11 16:54:10 2007 +0100 ptrace: generic resume This makes ptrace_request handle all the ptrace requests that wake up the traced task. These do low-level ptrace implementation magic that is not arch-specific and should be kept out of arch code. The implementations on each arch usually do the same thing. The new generic code makes use of the arch_has_single_step macro and generic entry points to handle PTRACE_SINGLESTEP. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1650bed63b5ef49533f1a1a12eb1077eac85e095 Author: Roland McGrath Date: Tue Dec 11 16:54:09 2007 +0100 x86 single_step: TIF_FORCED_TF This changes the single-step support to use a new thread_info flag TIF_FORCED_TF instead of the PT_DTRACE flag in task_struct.ptrace. This keeps arch implementation uses out of this non-arch field. This changes the ptrace access to eflags to mask TF and maintain the TIF_FORCED_TF flag directly if userland sets TF, instead of relying on ptrace_signal_deliver. The 64-bit and 32-bit kernels are harmonized on this same behavior. The ptrace_signal_deliver approach works now, but this change makes the low-level register access code reliable when called from different contexts than a ptrace stop, which will be possible in the future. The 64-bit do_debug exception handler is also changed not to clear TF from user-mode registers. This matches the 32-bit kernel's behavior. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit eaadb751fe7d9f1f1d16b6ede0e83ae66ac464ad Author: Roland McGrath Date: Tue Dec 11 16:54:09 2007 +0100 x86: single_step: share code This removes the single-step code from ptrace_32.c and uses the step.c code shared with the 64-bit kernel. The two versions of the code were nearly identical already, so the shared code has only a couple of simple #ifdef's. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d68cb9478526855759132b722413052829c20660 Author: Roland McGrath Date: Tue Dec 11 16:54:09 2007 +0100 x86: single_step 0xf0 This fixes the 64-bit single-step handling code's instruction decoder to grok the 0xf0 (lock) prefix, which the 32-bit code already does correctly. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit edb2f8c31be313ea6b70e0e16382f06391944879 Author: Roland McGrath Date: Tue Dec 11 16:54:09 2007 +0100 x86: single_step segment macros This cleans up the single-step code to use the asm/segment.h macros for segment selector magic bits, rather than its own constant. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 630e12f4ec239f179b3192422205442df883e8ff Author: Roland McGrath Date: Tue Dec 11 16:54:08 2007 +0100 x86: single_step moved This moves the single-step support code from ptrace_64.c into a new file step.c, verbatim. This paves the way for consolidating this code between 64-bit and 32-bit versions. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 555d38e80e6635116b3a0881c32c1e81e4e54963 Author: Roland McGrath Date: Tue Dec 11 16:54:08 2007 +0100 x86: arch_has_single_step This defines the new standard arch_has_single_step macro. It makes the existing set_singlestep and clear_singlestep entry points global, and renames them to the new standard names user_enable_single_step and user_disable_single_step, respectively. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 11f04959792821bdc5db219702a9f155a0a6b993 Author: Roland McGrath Date: Tue Dec 11 16:54:08 2007 +0100 x86: remove TRAP_FLAG This gets rid of the local constant macro TRAP_FLAG. It's redundant with the public constant macro X86_EFLAGS_TF. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 9c4efd8f2bbaec4736fbc1a1f0780ef14ff99590 Author: Roland McGrath Date: Tue Dec 11 16:54:08 2007 +0100 x86: segment selector macros This copies into asm-x86/segment_64.h some macros from asm-x86/segment_32.h for dissecting segment selectors. This lets other code use these macros uniformly on 32/64-bit rather than duplicating the constants elsewhere. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 525b2171bcc1a55a303175d18fbc4e13400dca76 Author: Roland McGrath Date: Tue Dec 11 16:54:07 2007 +0100 ptrace: arch_has_single_step This defines the new macro arch_has_single_step() in linux/ptrace.h, a default for when asm/ptrace.h does not define it. It declares the new user_enable_single_step and user_disable_single_step functions. This is not used yet, but paves the way to harmonize on this interface for the arch-specific calls on all machines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 653baf26a3f08b87a11b05c35ac9236fa2cbc78d Author: Andrew Morton Date: Tue Dec 11 16:54:07 2007 +0100 x86: kmap_atomic() debugging [ mingo@elte.hu: cleanups and made dependent on CONFIG_DEBUG_HIGHMEM. this caught a handful of bugs already, so lets apply it. If it gets things wrong we'll disable it. ] Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 78d59d75c6c1f7fb9ba7a3bf65cbe98ca7c25a3f Author: Mathieu Desnoyers Date: Tue Dec 11 16:54:07 2007 +0100 x86: fall back on interrupt disable in cmpxchg8b on 80386 and 80486 Actually, on 386, cmpxchg and cmpxchg_local fall back on cmpxchg_386_u8/16/32: it disables interruptions around non atomic updates to mimic the cmpxchg behavior. The comment: /* Poor man's cmpxchg for 386. Unsuitable for SMP */ already present in cmpxchg_386_u32 tells much about how this cmpxchg implementation should not be used in a SMP context. However, the cmpxchg_local can perfectly use this fallback, since it only needs to be atomic wrt the local cpu. This patch adds a cmpxchg_486_u64 and uses it as a fallback for cmpxchg64 and cmpxchg64_local on 80386 and 80486. Q: but why is it called cmpxchg_486 when the other functions are called A: Because the standard cmpxchg is missing only on 386, but cmpxchg8b is missing both on 386 and 486. Citing Intel's Instruction set reference: cmpxchg: This instruction is not supported on Intel processors earlier than the Intel486 processors. cmpxchg8b: This instruction encoding is not supported on Intel processors earlier than the Pentium processors. Q: What's the reason to have cmpxchg64_local on 32 bit architectures? Without that need all this would just be a few simple defines. A: cmpxchg64_local on 32 bits architectures takes unsigned long long parameters, but cmpxchg_local only takes longs. Since we have cmpxchg8b to execute a 8 byte cmpxchg atomically on pentium and +, it makes sense to provide a flavor of cmpxchg and cmpxchg_local using this instruction. Also, for 32 bits architectures lacking the 64 bits atomic cmpxchg, it makes sense _not_ to define cmpxchg64 while cmpxchg could still be available. Moreover, the fallback for cmpxchg8b on i386 for 386 and 486 is a However, cmpxchg64_local will be emulated by disabling interrupts on all architectures where it is not supported atomically. Therefore, we *could* turn cmpxchg64_local into a cmpxchg_local, but it would make the 386/486 fallbacks ugly, make its design different from cmpxchg/cmpxchg64 (which really depends on atomic operations and cannot be emulated) and require the __cmpxchg_local to be expressed as a macro rather than an inline function so the parameters would not be fixed to unsigned long long in every case. So I think cmpxchg64_local makes sense there, but I am open to suggestions. Q: Are there any callers? A: I am actually using it in LTTng in my timestamping code. I use it to work around CPUs with asynchronous TSCs. I need to update 64 bits values atomically on this 32 bits architecture. Changelog: - Ran though checkpatch. Signed-off-by: Mathieu Desnoyers Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1f7faee233617ed96e0032088b27f424e4e92d70 Author: Ralf Baechle Date: Tue Dec 11 16:54:07 2007 +0100 mips, x86: optimize the i8259 code a bit The timer code always calls the clock_event_device set_net_event and set_mode methods with interrupts disabled, so no need to use spin_lock_irqsave / spin_unlock_irqrestore for those. Signed-off-by: Ralf Baechle Acked-by:Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 27b82f4f1aeaf616ef852e958c7e1983c8ec91c2 Author: Christoph Lameter Date: Tue Dec 11 16:54:07 2007 +0100 x86: 64-bit, make sparsemem vmemmap the only memory model Use sparsemem as the only memory model for UP, SMP and NUMA. Measurements indicate that DISCONTIGMEM has a higher overhead than sparsemem. And FLATMEMs benefits are minimal. So I think its best to simply standardize on sparsemem. Results of page allocator tests (test can be had via git from slab git tree branch tests) Measurements in cycle counts. 1000 allocations were performed and then the average cycle count was calculated. Order FlatMem Discontig SparseMem 0 639 665 641 1 567 647 593 2 679 774 692 3 763 967 781 4 961 1501 962 5 1356 2344 1392 6 2224 3982 2336 7 4869 7225 5074 8 12500 14048 12732 9 27926 28223 28165 10 58578 58714 58682 (Note that FlatMem is an SMP config and the rest NUMA configurations) Memory use: SMP Sparsemem ------------- Kernel size: text data bss dec hex filename 3849268 397739 1264856 5511863 541ab7 vmlinux total used free shared buffers cached Mem: 8242252 41164 8201088 0 352 11512 -/+ buffers/cache: 29300 8212952 Swap: 9775512 0 9775512 SMP Flatmem ----------- Kernel size: text data bss dec hex filename 3844612 397739 1264536 5506887 540747 vmlinux So 4.5k growth in text size vs. FLATMEM. total used free shared buffers cached Mem: 8244052 40544 8203508 0 352 11484 -/+ buffers/cache: 28708 8215344 2k growth in overall memory use after boot. NUMA discontig: text data bss dec hex filename 3888124 470659 1276504 5635287 55fcd7 vmlinux total used free shared buffers cached Mem: 8256256 56908 8199348 0 352 11496 -/+ buffers/cache: 45060 8211196 Swap: 9775512 0 9775512 NUMA sparse: text data bss dec hex filename 3896428 470659 1276824 5643911 561e87 vmlinux 8k text growth. Given that we fully inline virt_to_page and friends now that is rather good. total used free shared buffers cached Mem: 8264720 57240 8207480 0 352 11516 -/+ buffers/cache: 45372 8219348 Swap: 9775512 0 9775512 The total available memory is increased by 8k. This patch makes sparsemem the default and removes discontig and flatmem support from x86. Acked-by: Andi Kleen Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit cf603b8758f6905610f6ae9231500a0a7502ec42 Author: Borislav Petkov Date: Tue Dec 11 16:54:06 2007 +0100 x86: vmlinux_32.lds.S: remove repeated comment from the x86-32 linker script Remove repeated comment from the linker script for the x86-32 target. Signed-off-by: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e47d6fd89996144ef1c7ea8d06baf17bd8ab71ec Author: Yinghai Lu Date: Tue Dec 11 16:54:06 2007 +0100 x86: do not set boot cpu in cpu_online_map at x86_64_start_kernel() In init/main.c boot_cpu_init() does that later Signed-off-by: Yinghai Lu Cc: Zachary Amsden Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4f308d09ca0ea4a4f06057a8f07f5e1a7a08db14 Author: Yinghai Lu Date: Tue Dec 11 16:54:06 2007 +0100 x86: set cpu_index to nr_cpus instead of 0 Same BIOS will support two/four dualcore/quadcore system, and will get ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled) Processor #0 15:1 APIC version 16 ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled) Processor #1 15:1 APIC version 16 ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled) Processor #2 15:1 APIC version 16 ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled) Processor #3 15:1 APIC version 16 ACPI: LAPIC (acpi_id[0x05] lapic_id[0x84] disabled) ACPI: LAPIC (acpi_id[0x06] lapic_id[0x85] disabled) ACPI: LAPIC (acpi_id[0x07] lapic_id[0x86] disabled) ACPI: LAPIC (acpi_id[0x08] lapic_id[0x87] disabled) ACPI: LAPIC (acpi_id[0x09] lapic_id[0x88] disabled) ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x89] disabled) ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x8a] disabled) ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x8b] disabled) ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x8c] disabled) ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x8d] disabled) ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x8e] disabled) ACPI: LAPIC (acpi_id[0x10] lapic_id[0x8f] disabled) SMP: Allowing 16 CPUs, 12 hotplug CPUs the /proc/cpuinfo will show as bunch of NULL cpus with cpu_index=0 so assign impossible cpu_index value at first instead of 0. Signed-off-by: Yinghai Lu Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d7d4be5c86af01e3812070da293d6f9e4d8822e1 Author: Jeremy Fitzhardinge Date: Tue Dec 11 16:54:06 2007 +0100 xen-mask-_page_pcd-from-ptes _PAGE_PCD maps a page with caching disabled, which is typically used for mapping harware registers. Xen never allows it to be set on a mapping, and unprivileged guests never need it since they can't see the real underlying hardware. However, some uncached mappings are made early when probing the (non-existent) APIC, and its OK to mask off the PCD flag in these cases. This became necessary because Xen started checking for this bit, rather than silently masking it off. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3f57d32e2b0d11b38b4ff4f2b93c2362323473cb Author: Florian Fainelli Date: Tue Dec 11 16:54:06 2007 +0100 x86: Add the RDC machine specific reboot fixup The RDC R-321x SoC needs a reboot fixup which uses its internal hardware watchdog set to reset the CPU on next tick. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2c4c11e772059439cf2726628bcacb77e1f0ffdc Author: Florian Fainelli Date: Tue Dec 11 16:54:06 2007 +0100 x86: Add support for the RDC R-321x SoC This patch adds support for the RDC R-321x system-on-chip, also known as R-861x-(G). It uses the generic GPIO API and has support for the on-chip hardware watchdog. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d65ae8827318e6957c7f8118009e987aa024114e Author: Florian Fainelli Date: Tue Dec 11 16:54:05 2007 +0100 pci: Add PCI identifiers for the RDC devices This patch defines the PCI identifiers found in the RDC R-321x System-on-Chip. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 18e7765c4e0b7e4a08416439b2deec2132963703 Author: Florian Fainelli Date: Tue Dec 11 16:54:05 2007 +0100 x86: Add generic GPIO support to x86 This patch adds the generic GPIO support to the x86 architecture. We do the same as for MIPS, we let the machine override the gpio callbacks and provide defaults one in mach-generic. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1297a775e2ae4a8fe51f57910ab8e13d0ac4817b Author: Andres Salomon Date: Tue Dec 11 16:54:05 2007 +0100 x86: GEODE: update GPIO API to support setting multiple GPIOs at once The existing Geode GPIO API only allows for updating one GPIO at once. There are instances where users want to update multiple GPIOs at once. With the current API, they are given two choices; either ignore the GPIO API: outl(0xc000, gpio_base + GPIO_OUTPUT_VAL); outl(0xc000, gpio_base + GPIO_OUTPUT_ENABLE); Alternatively, call each GPIO update separately: geode_gpio_set(14, GPIO_OUTPUT_VAL); geode_gpio_set(15, GPIO_OUTPUT_VAL); geode_gpio_set(14, GPIO_OUTPUT_ENABLE); geode_gpio_set(15, GPIO_OUTPUT_ENABLE); Neither are desirable. This patch changes the GPIO API to allow for setting of multiple GPIOs at once; rather than being passed an integer, we pass a bitmask and provide a translation function. The above code would now look like this: geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_VAL); geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_ENABLE); Since there are no upstream users of the GPIO API yet (afaik), best to change this now. This also adds a bit of sanity checking; it is no longer possible to use a GPIO above 28. Note the semantics of geode_gpio_isset() have changed: geode_gpio_isset(geode_gpio(3)|geode_gpio(4), ...) will only return true iff both GPIOs are set. Signed-off-by: Andres Salomon Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5a2da98776a6017ee17713f95cee9d2ff0517b2d Author: Vladimir Berezniker Date: Tue Dec 11 16:54:05 2007 +0100 x86_64: sanitize user specified e820 memmap values Sanitize user specified e820 memory ranges, using the same logic that is applied to the values returned by the BIOS. This ensures consistent handling regardless of the source of the memory mappings. Allows overriding portions of the memory map without specifying one in it's entirety (memmap=exactmap). E.g. marking a range of bad RAM as reserved with memmap=48M$528M BIOS supplied range BIOS-e820: 0000000000100000 - 000000007fe80000 (usable) becomes user: 0000000000100000 - 0000000021000000 (usable) user: 0000000021000000 - 0000000024000000 (reserved) user: 0000000024000000 - 000000007fe80000 (usable) Previously this did not work, as the original BIOS range was left untouched while the user defined range was appended to the end of the memory map. [ tglx: arch/x86 adaptation ] Signed-off-by: Vladimir Berezniker Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a23e502316719b665e4615b948921b9b17265891 Author: Roland McGrath Date: Tue Dec 11 16:54:05 2007 +0100 x86: TLS cleanup This consolidates the four different places that implemented the same encoding magic for the GDT-slot 32-bit TLS support. The old tls32.c was renamed and is now only slightly modified to be the shared implementation. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Zachary Amsden Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2541ad16396e1cb605ca57fed6cff2c0c4a538a4 Author: Roland McGrath Date: Tue Dec 11 16:54:04 2007 +0100 x86: tls32 moved This renames arch/x86/ia32/tls32.c to arch/x86/kernel/tls.c, which does nothing now but paves the way to consolidate this code for 32-bit too. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Zachary Amsden Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4f9cb81328f02d98f140849c2af36873f22b9e76 Author: Roland McGrath Date: Tue Dec 11 16:54:03 2007 +0100 x86: desc_empty This replaces the desc_empty macro with an inline. It now handles easily any of the four different types used between 32/64 code to refer to these 8 bytes. It's identical in both asm-x86/processor_64.h and asm-x86/processor_32.h, so if these files ever get merged this function can be in the common code. This also removes the desc_equal macro because nothing uses it. Signed-off-by: Roland McGrath Cc: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 35169a39ceb25760a7ae0fa221d76b8e98d52339 Author: Roland McGrath Date: Tue Dec 11 16:54:03 2007 +0100 x86: ptrace fs/gs_base The fs_base and gs_base fields are available in user_regs_struct. But reading these via ptrace (PTRACE_GETREGS or PTRACE_PEEKUSR) does not give a reliably useful value. The thread_struct fields are 0 when do_arch_prctl decided to use a GDT slot instead of MSR_FS_BASE, which it does for a value under 1<<32. This changes ptrace access to fs_base and gs_base to work like PTRACE_ARCH_PRCTL does. That is, it reads the base address that user-mode memory access using the fs/gs instruction prefixes will use, regardless of how it's being implemented in the kernel. The MSR vs GDT is an implementation detail that is pretty much hidden from userland in the actual using, and there is no reason that ptrace should give the internal implementation picture rather than the user-mode semantic picture. In the case of setting the value, this can implicitly change the fsindex/gsindex value (also separately in user_regs_struct), which is what happens when the thread calls arch_prctl itself. In a PTRACE_SETREGS, the fs_base change will come after the fsindex change due to the order of the struct, and so a change the debugger made to fs_base will have the effect intended, another part of the user_regs_struct will now differ when read back from what the debugger wrote. This makes PTRACE_ARCH_PRCTL obsolete. We could consider declaring it deprecated and removing it one day, though there is no hurry. For the foreseeable future, debuggers have to assume an old kernel that does not report reliable fs_base/gs_base values in user_regs_struct and stick to PTRACE_ARCH_PRCTL anyway. Signed-off-by: Roland McGrath Cc: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit af843fff5d8ceba41596649fed0d19ac1efcfa8b Author: Roland McGrath Date: Tue Dec 11 16:54:02 2007 +0100 x86: use get_desc_base This changes a couple of places to use the get_desc_base function. They were duplicating the same calculation with different equivalent code. Signed-off-by: Roland McGrath Cc: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit f3631ddf7100ea1aceff1d7051f3285e9ad7630e Author: Roland McGrath Date: Tue Dec 11 16:54:02 2007 +0100 x86: get_desc_base This defines the get_desc_base function in asm-x86/desc_64.h to match the one in desc_32.h. If these two files ever get merged together, this function could be the same in both. Signed-off-by: Roland McGrath Cc: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit fd2ab59395160127dc9491e78e791982aa218e9e Author: Roland McGrath Date: Tue Dec 11 16:54:02 2007 +0100 x86 vDSO: canonicalize sysenter .eh_frame Some assembler versions automagically optimize .eh_frame contents, changing their size. The CFI in sysenter.S was not using optimal formatting, so it would be changed by newer/smarter assemblers. This ran afoul of the wired constant for padding out the other vDSO images to match its size. This changes the original hand-coded source to use the optimal format encoding for its operations. That leaves nothing more for a fancy assembler to do, so the sizes will match the wired-in expected size regardless of the assembler version. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3a45a9a26a624c451dd33b8c650253a9efdac940 Author: Roland McGrath Date: Tue Dec 11 16:54:02 2007 +0100 x86 vDSO: makefile cleanup This cleans up the arch/x86/vdso/Makefile rules for vdso.so to share more code with the vdso32-*.so rules and remove old cruft. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 288543401b7baf2036cc3a5249aa4ef307457d66 Author: Roland McGrath Date: Tue Dec 11 16:54:02 2007 +0100 x86 vDSO: i386 vdso32 fix: > The .eh_frame sections are different in size, which bumps the sysenter > one into the next alignment datum. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 720c8688bd2634e4212d0af64e83eacfd74486ba Author: Roland McGrath Date: Tue Dec 11 16:54:01 2007 +0100 x86 vDSO: reorder vdso32 code This reorders the code in the 32-bit vDSO images to put the signal trampolines first and __kernel_vsyscall after them. The order does not matter to userland, it just uses what AT_SYSINFO or e_entry says. Since the signal trampolines are the same size in both versions of the vDSO, putting them first is the simplest way to get the addresses to line up. This makes it work to use a more compact layout for the vDSO. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 702201ff3a59d27c21af546b2d50b65784893e92 Author: Roland McGrath Date: Tue Dec 11 16:54:01 2007 +0100 x86 vDSO: ia32 vsyscall removal This removes all the old vsyscall code from arch/x86/ia32/ that is no longer used because arch/x86/vdso/ code has replaced it. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 43ccb7cf8e0bc6f99169d3acaf031d606ad4350f Author: Roland McGrath Date: Tue Dec 11 16:53:59 2007 +0100 x86 vDSO: consolidate vdso32 This makes x86_64's ia32 emulation support share the sources used in the 32-bit kernel for the 32-bit vDSO and much of its setup code. The 32-bit vDSO mapping now behaves the same on x86_64 as on native 32-bit. The abi.syscall32 sysctl on x86_64 now takes the same values that vm.vdso_enabled takes on the 32-bit kernel. That is, 1 means a randomized vDSO location, 2 means the fixed old address. The CONFIG_COMPAT_VDSO option is now available to make this the default setting, the same meaning it has for the 32-bit kernel. (This does not affect the 64-bit vDSO.) The argument vdso32=[012] can be used on both 32-bit and 64-bit kernels to set this paramter at boot time. The vdso=[012] argument still does this same thing on the 32-bit kernel. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3d3c0a45fb16ebf8584c978324e50f05c791496a Author: Roland McGrath Date: Tue Dec 11 16:53:58 2007 +0100 x86 vDSO: ia32 vdso32-syscall build This puts the syscall version of the 32-bit vDSO in arch/x86/vdso/vdso32/ for 64-bit IA32 support. This is not used yet, but it paves the way for consolidating the 32-bit vDSO source and build logic all in one place. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e68913d241715d77d0ce1f836a1b77eaf462081c Author: Roland McGrath Date: Tue Dec 11 16:53:57 2007 +0100 x86 vDSO: ia32 sysenter_return This changes the 64-bit kernel's support for the 32-bit sysenter instruction to use stored fields rather than constants for the user-mode return address, as the 32-bit kernel does. This adds a sysenter_return field to struct thread_info, as 32-bit has. There is no observable effect from this yet. It makes the assembly code independent of the 32-bit vDSO mapping address, paving the way for making the vDSO address vary as it does on the 32-bit kernel. [ akpm@linux-foundation.org: build fix on !CONFIG_IA32_EMULATION ] Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 5651cb78fb074337130f99116f404be1a05e5819 Author: Roland McGrath Date: Tue Dec 11 16:53:57 2007 +0100 x86 vDSO: ia32_sysenter_target This harmonizes the name for the entry point from the 32-bit sysenter instruction across 32-bit and 64-bit kernels. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3072269949a0a4c0bab140fe98691a88b2b74774 Author: Roland McGrath Date: Tue Dec 11 16:53:57 2007 +0100 x86 vDSO: vdso32 setup This moves arch/x86/kernel/sysenter_32.c to arch/x86/vdso/vdso32-setup.c, keeping all the code relating only to vDSO magic in the vdso/ subdirectory. This is a pure renaming, but it paves the way to consolidating the code for dealing with 32-bit vDSOs across CONFIG_X86_32 and CONFIG_IA32_EMULATION. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 0c7850b657141f1f7a052cec2a88d9bf47d63677 Author: Roland McGrath Date: Tue Dec 11 16:53:57 2007 +0100 x86 vDSO: i386 vdso32 install This enables 'make vdso_install' for i386 as on x86_64 and powerpc. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a0a874ad3b352e85914e652a40f825b4fccb1ae3 Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: absolute relocs This updates the exceptions for absolute relocs for the new symbol name convention used for symbols extracted from the vDSO images. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b758519642abc5bda20bc549812e2d2ba7ada16f Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: i386 vdso32 This makes the i386 kernel use the new vDSO build in arch/x86/vdso/vdso32/ to replace the old one from arch/x86/kernel/. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2cf119b0563cc1d23d295ce281923a63f02abd5b Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: vdso32 build This builds the 32-bit vDSO images in the arch/x86/vdso subdirectory. Nothing uses the images yet, but this paves the way for consolidating the vDSO build logic all in one place. The new images use a linker script sharing the layout parts from vdso-layout.lds.S with the 64-bit vDSO. A new vdso32-syms.lds is generated in the style of vdso-syms.lds. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 6a3bc454c4a4a76f16f2f873db9f70c281310021 Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: arch/x86/vdso/vdso32 This moves the i386 vDSO sources into arch/x86/vdso/vdso32/, a new directory. This patch is a pure renaming, but paves the way for consolidating the vDSO build logic. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1f54b4df109aef5b5979813947d60545264cf95a Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: harmonize asm-offsets This change harmonizes the asm-offsets macros used in the 32-bit vDSO across 32-bit and 64-bit builds. It's a purely cosmetic change for now, but it paves the way for consolidating the 32-bit vDSO builds. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 7756b7e5dfda5ff83e7fae4f5603163d80ffe85a Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: new layout This revamps the vDSO linker script to lay things out with the best packing of the data and good, separate alignment of the code. The rigid layout using VDSO_TEXT_OFFSET no longer matters to the kernel. I've moved the layout parts of the linker script into a new include file, vdso-layout.lds.S; this is in preparation for sharing the script for the 32-bit vDSO builds too. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit fdd57efe707ee803aa6029945771ceae6700f3cd Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: remove vdso-syms.o Get rid of vdso-syms.o from the kernel link. We don't need it any more. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a5c0b67d1ccc33582f9c79f2fb4e0911eeb4ec4c Author: Roland McGrath Date: Tue Dec 11 16:53:56 2007 +0100 x86 vDSO: use vdso-syms.lds This patch changes the kernel's references to addresses in the vDSO image to be based on the symbols defined by vdso-syms.lds instead of the old vdso-syms.o symbols. This is all wrapped up in a macro defined by the new asm-x86/vdso.h header; that's the only place in the kernel source that has to know the details of the scheme for getting vDSO symbol values. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit c22a6aada21108e00c8384e9c9bb24067d5b670c Author: Roland McGrath Date: Tue Dec 11 16:53:55 2007 +0100 x86 vDSO: generate vdso-syms.lds This patch adds a new way of extracting symbols from the built vDSO image. This is much simpler and less fragile than using ld -R; it removes the need to control the DSO layout quite so exactly. I was clearly unduly distracted by clever ld uses when I did the original vDSO implementation. Signed-off-by: Roland McGrath Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit db37947e8dadbd886f942540083b97f65d56dd32 Author: Jiri Kosina Date: Tue Dec 11 16:53:55 2007 +0100 x86: randomize brk Randomize the location of the heap (brk) for i386 and x86_64. The range is randomized in the range starting at current brk location up to 0x02000000 offset for both architectures. This, together with pie-executable-randomization.patch and pie-executable-randomization-fix.patch, should make the address space randomization on i386 and x86_64 complete. Arjan says: This is known to break older versions of some emacs variants, whose dumper code assumed that the last variable declared in the program is equal to the start of the dynamically allocated memory region. (The dumper is the code where emacs effectively dumps core at the end of it's compilation stage; this coredump is then loaded as the main program during normal use) iirc this was 5 years or so; we found this way back when I was at RH and we first did the security stuff there (including this brk randomization). It wasn't all variants of emacs, and it got fixed as a result (I vaguely remember that emacs already had code to deal with it for other archs/oses, just ifdeffed wrongly). It's a rare and wrong assumption as a general thing, just on x86 it mostly happened to be true (but to be honest, it'll break too if gcc does something fancy or if the linker does a non-standard order). Still its something we should at least document. Note 2: afaik it only broke the emacs *build*. I'm not 100% sure about that (it IS 5 years ago) though. [akpm@linux-foundation.org: deuglification] Signed-off-by: Jiri Kosina Cc: Arjan van de Ven Cc: Roland McGrath Cc: Jakub Jelinek Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ba98b4eebce01732cc469fb8007fa40d65e77c2d Author: Robert Richter Date: Tue Dec 11 16:53:54 2007 +0100 Extended interrupt LVT support for AMD Barcelona Also macro definitions in apicdef.h has been updated. The patch is relative to x86/cleanup tree. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4ac31d69ad754c3d4b126330b0dc49c3b6621686 Author: Christoph Lameter Date: Tue Dec 11 16:53:53 2007 +0100 x86: make stack size configurable Make the stack size configurable necessary. SGI NUMA configurations may need more stack because cpumasks and nodemasks are at times kept on the stack. This patch allows to run with 16k or 32k kernel stacks. [tglx@linutronix.de: add range check and dependencies and fix the !NUMA case] Signed-off-by: Christoph Lameter Cc: Andi Kleen Cc: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 354f2a7015a8b8a5870548399fe54cad8e370583 Author: Barry Kasindorf Date: Tue Dec 11 16:53:49 2007 +0100 oprofile: op_model_athlon.c support for AMD family 10h barcelona performance counters This patch is for controlling the upper 32bits of the event ctrl msrs. This includes the upper 4 bits of the event select and the Guest Only and Host Only bits This patch is necessary to make Event Based Profiling work reliably on a Family 10h processor {akpm@linux-foundation.org: checkpatch.pl fixes] Signed-off-by: Barry Kasindorf Signed-off-by: Robert Richter Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 812fab48d989a46f6bdc3495261c27013be79af1 Author: Yinghai Lu Date: Tue Dec 11 16:53:49 2007 +0100 x86: check and enable MMCONFIG for AMD Family 10h Opteron check and enable MMCONFIG for AMD Family 10h Opteron. [akpm@linux-foundation.org: section fix] Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 8b7714794f4f40a325790e2fdc5147fe9a14f2e9 Author: Yinghai Lu Date: Tue Dec 11 16:53:42 2007 +0100 x86: set cfg_size for AMD Family 10h in case MMCONFIG is used reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID. Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit edf795f9c0a083027616bb8555ffe4df0f5c791a Author: Yinghai Lu Date: Tue Dec 11 16:53:42 2007 +0100 x86: check MSR to get mmconfig for amd family 10h opterons So even MCFG is not there, we still can use MMCONFIG. Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1e04909e0421811b227f99f3867974c591b3f734 Author: Robert Hancock Date: Tue Dec 11 16:53:41 2007 +0100 x86: validate against ACPI motherboard resources This path adds validation of the MMCONFIG table against the ACPI reserved motherboard resources. If the MMCONFIG table is found to be reserved in ACPI, we don't bother checking the E820 table. The PCI Express firmware spec apparently tells BIOS developers that reservation in ACPI is required and E820 reservation is optional, so checking against ACPI first makes sense. Many BIOSes don't reserve the MMCONFIG region in E820 even though it is perfectly functional, the existing check needlessly disables MMCONFIG in these cases. In order to do this, MMCONFIG setup has been split into two phases. If PCI configuration type 1 is not available then MMCONFIG is enabled early as before. Otherwise, it is enabled later after the ACPI interpreter is enabled, since we need to be able to execute control methods in order to check the ACPI reserved resources. Presently this is just triggered off the end of ACPI interpreter initialization. There are a few other behavioral changes here: - Validate all MMCONFIG configurations provided, not just the first one. - Validate the entire required length of each configuration according to the provided ending bus number is reserved, not just the minimum required allocation. - Validate that the area is reserved even if we read it from the chipset directly and not from the MCFG table. This catches the case where the BIOS didn't set the location properly in the chipset and has mapped it over other things it shouldn't have. This also cleans up the MMCONFIG initialization functions so that they simply do nothing if MMCONFIG is not compiled in. Based on an original patch by Rajesh Shah from Intel. [akpm@linux-foundation.org: many fixes and cleanups] Signed-off-by: Robert Hancock Signed-off-by: Andi Kleen Cc: Rajesh Shah Cc: Jesse Barnes Acked-by: Linus Torvalds Cc: Andi Kleen Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4c733f233e790a62d2a704ec4a5961ba32803d0c Author: Andi Kleen Date: Tue Dec 11 16:53:41 2007 +0100 x86: untable __init references between IO data Earlier patch added IO APIC setup into local APIC setup. This caused modpost warnings. Fix them by untangling setup_local_APIC() and splitting it into smaller functions. The IO APIC initialization is only called for the BP init. Also removed some outdated debugging code and minor cleanup. [ tglx: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ea9a42e2ce5d4af9760d82d03fa201292b30ac31 Author: Yinghai Lu Date: Tue Dec 11 16:53:41 2007 +0100 x86: use core id bits for apicid_to_node initialization We shoud use core id bits instead of max cores, in case later with AMD downcores Quad core Opteron. [ tglx: arch/x86 adaptation ] Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 31c2254bd0a3edd5003a0c8037b644c273ffa2bd Author: Yinghai Lu Date: Tue Dec 11 16:53:41 2007 +0100 store core id bits in cpuinfo_x8 We need to store core id bits to cpuinfo_x86 in early_identify_cpu. So we use it to create acpiid_to_node array in k8topolgy.c Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Andi Kleen Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ad9c624d8d83ae66f13faf90c9022c3f6cc364cd Author: Adrian Bunk Date: Tue Dec 11 16:53:41 2007 +0100 i386: remove -maccumulate-outgoing-args Contrary to the comment "newer gccs do it by default", newer gcc versions default to -maccumulate-outgoing-args only with CONFIG_CC_OPTIMIZE_FOR_SIZE=n, and then only with some CPU settings. Measured with an i386 defconfig, gcc 4.2.1 and kernel 2.6.23-rc1 ("orig" is the plain kernel, "changed is with -maccumulate-outgoing-args removed): $ ls -la vmlinux* -rwxrwxr-x 1 bunk bunk 6269713 2007-07-24 22:19 vmlinux.changed -rwxrwxr-x 1 bunk bunk 6425361 2007-07-24 22:19 vmlinux.orig $ size vmlinux.* text data bss dec hex filename 4493465 504108 614400 5611973 55a1c5 vmlinux.changed 4646160 504108 614400 5764668 57f63c vmlinux.orig $ That's a 2.5% size increase that does for sure hurt small systems. If the stack unwinder ever comes back and needs this as indicated in the comment, adding it to the cflags when the user enabled the unwinder should be a better option. [ tglx: arch/x86 adaptation ] Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b811488d4a95279c3e551be09018b3e2c04599d9 Author: Yinghai Lu Date: Tue Dec 11 16:53:41 2007 +0100 x86: clear IO_APIC before enabing apic error vector. some apic id lifting system: 4 socket quad core, 8 socket quad core will do apic id lifting for BSP. but io-apic regs for ExtINT still use 0 as dest. so when we enable apic error vector in BSP, we will get one APIC error. CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line) CPU: L2 Cache: 512K (64 bytes/line) CPU 0/4 -> Node 0 CPU: Physical Processor ID: 1 CPU: Processor Core ID: 0 SMP alternatives: switching to UP code ACPI: Core revision 20070126 enabled ExtINT on CPU#0 ESR value after enabling vector: 00000000, after 0000000c APIC error on CPU0: 0c(08) ENABLING IO-APIC IRQs Synchronizing Arb IDs. So move enable_IO_APIC from setup_IO_APIC into setup_local_APIC and call it before enabling apic error vector. [ tglx: arch/x86 adaptation ] Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b9fac3d7296b07292f1aba467c23348aabce126c Author: Thomas Gleixner Date: Tue Dec 11 16:53:40 2007 +0100 x86: cleanup kernel/setup_64.c Clean it up before applying more patches to it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit b0e41c5d046b935e864cfc49ad172e4c3ded67fc Author: Steven Rostedt Date: Tue Dec 11 16:53:40 2007 +0100 remove unused tsk_thread from asm-offsets_64.c So this patch simply removes the "thread" from asm-offsets.c since I can't find an owner for it. Signed-off-by: Steven Rostedt Cc: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4e32cf56a10f14d0a5ee8e79c041c0822b9e0b7e Author: Dave Jones Date: Tue Dec 11 16:53:40 2007 +0100 Use CR0 defines. Signed-off-by: Dave Jones Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 46a23f9d0a59e78c610bff9ff3f726988c866364 Author: Thomas Gleixner Date: Tue Dec 11 16:53:40 2007 +0100 x86: merge resume-trace.h variants Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 2b613f8d41d41427edce2ac5bdb117abd6a274f9 Author: Thomas Gleixner Date: Tue Dec 11 16:53:40 2007 +0100 x86: merge topology.h variants Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit efa5e3f4f09c3871882f8332e4da13fbbcbaa056 Author: Thomas Gleixner Date: Tue Dec 11 16:53:40 2007 +0100 x86: consolidate toloplogy_32/64.h Reorder defines and do white space / coding style cleanups to get a readable diff. Also convert the macros to inline functions. Move the pci related inlines to pci.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6c41d3019a0444fd61cf84c6bcbe2ebdd223f1ac Author: Thomas Gleixner Date: Tue Dec 11 16:53:39 2007 +0100 x86: adjust numa 32 namespace Use the 64bit numa variable names for numa32 as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 57688d21b4f6a6ab42434f796ffe23c290ebe357 Author: Thomas Gleixner Date: Tue Dec 11 16:53:39 2007 +0100 x86: fixup numa 64 namespace Using a variable name, which is the same as a macro name is not really smart. Change the variable names and fixup all users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 83b839d1fe2a7408ea75d6763f0c0d71a0f02840 Author: Thomas Gleixner Date: Tue Dec 11 16:53:39 2007 +0100 x86: cleanup numa_64.c Clean it up before applying more patches. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 1afdb8e5de6bb5f9bda1d92db9e045c53c85aa87 Author: Thomas Gleixner Date: Tue Dec 11 16:53:39 2007 +0100 x86: merge include/asm-x86/sparsemem.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 7e3d37fd68be047af79579e25872d0589fcbe0eb Author: Thomas Gleixner Date: Tue Dec 11 16:53:39 2007 +0100 x86: merge include/asm-x86/sparsemem.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 1b3d75bb2ca87ffa8a166aadc55fb3398f74187d Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: put all kern_addr_valid() incarnations to pgtable.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a6dd54484fdf1f01548f1fcf9ae012f6a456b46a Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: merge acpi_32/64.h Merge the files. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 91f377cd4d54ab0a90a09cf80beb4e4b43382b24 Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: cleanup acpi_32/64.h Fix coding style to get a readable diff Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit c5c25a33d564bf227adf2ff9487dcfa5dbe69f3c Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: cleanup smp.h variants Bring the smp.h variants into sync to prepare merging and paravirt support. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit aa6dd8d0f7de627c5b28b476f40f8118d9f1246d Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: merge mpspec variants The delta is now minimal. Merge them Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4dfc11064b55c1787b556461f3124f4bc3be2cce Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: cleanup mpspec variants Bring the mpspec variants into sync to prepare merging and paravirt support. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 032fcd450093ce0e5919a4deb9d4d027a5de84e3 Author: Thomas Gleixner Date: Tue Dec 11 16:53:38 2007 +0100 x86: merge tlbflush.h variants The delta is now minimal. Merge them Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1f4ccf93fb5816bdf2acbfa8bdc7d5cdd11d0d50 Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86: cleanup tlbflush.h variants Bring the tlbflush.h variants into sync to prepare merging and paravirt support. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 5f1171d9d33ba0a2ccc59713308fa2af69572072 Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86 cleanup boot_ioreamp_32.c Coding style cleanup before modifying the file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ea943a73f225c6bab2f273120ba55aa7b823fea9 Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86: merge spinlock.h variants Merge them finally together Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit d55f9a459c75c6a572245c73b12b6450a7ad6ed6 Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86: spinlock_32/64 substitute types and instructions Use _slock_t for the spinlock data types and replace the instructions by string defines, which makes the code of 32/64 bit versions more or less identical. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 532f4ec02ca4be686cf45cb17701dfa341122392 Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86: spinlock_32/64 match the jump labels and symbols Match the jump labels in the 32/64 variants and switch the 64bit version to symbols, so the functions are almost identical except for the operand size now. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a426341949a0a8a99a7c13d3108f41f3a4c8863b Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86: use immediates instead of RW_LOCK_BIAS_STR Use immediate instead of the RW_LOCK_BIAS_STR. Makes the code more readable and gets rid of the string constant. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 228bf61a4c1d33ca93f0794e1966241c44382082 Author: Thomas Gleixner Date: Tue Dec 11 16:53:37 2007 +0100 x86: fix asm constraints in spinlock_32/64.h Use the correct constraints for the spinlock assembler functions. read (modify) write functions need "+m" instead of "=m" Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4d8066104d6381f564c882a49ad02992ba327823 Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:53:37 2007 +0100 x86: consolidate spinlock.h The cli and sti instructions need to be replaced by paravirt hooks. For the i386 architecture, this is already done. The code requirements aren't much different from x86_64 POV, so this part is consolidated in the common header Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Steven Rostedt Acked-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 07564fbf852a5e25dc2e237e248e307b783dab9e Author: Glauber de Oliveira Costa Date: Tue Dec 11 16:53:37 2007 +0100 irqflags consolidation This patch consolidates the irqflags include files containing common paravirt definitions. The native definition for interrupt handling, halt, and such, are the same for 32 and 64 bit, and they are kept in irqflags.h. the differences are split in the arch-specific files. The syscall function, irq_enable_sysexit, has a very specific i386 naming, and its name is then changed to a more general one. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Steven Rostedt Acked-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit dfaaaf1d91ab90bac4098ed50e414fd420326574 Author: Hiroshi Shimamoto Date: Tue Dec 11 16:53:37 2007 +0100 x86: clean up nmi_32/64.c clean up and make nmi_32/64.c more similar. - white space and coding style clean up. - nmi_cpu_busy is available on CONFIG_SMP. - move functions __acpi_nmi_enable, acpi_nmi_enable, __acpi_nmi_disable and acpi_nmi_disable. - make variables name more similar. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 8713da3b54cf47784f4b077508c27856fbf0cce2 Author: clameter@sgi.com Date: Tue Dec 11 16:53:36 2007 +0100 x86: clean up stack allocation and free Clean up the allocation and freeing of stacks a bit by using a __GFP_ZERO flag instead of memset. Signed-off-by: Christoph Lameter Cc: Andi Kleen Cc: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b17d43fd96b9cb614b887fe618139cd8bfc345b7 Author: Randy Dunlap Date: Tue Dec 11 16:53:36 2007 +0100 x86: bitops_32.h style cleanups Coding style cleanups in x86/bitops_32.h: - drop space in "* addr" - whitespace & indentation fixes - spello fixes Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1bc3afafe06222f2edcafdadb6b6acd2a8109aab Author: Bernhard Walle Date: Tue Dec 11 16:53:36 2007 +0100 x86: remove extern declarations for code, data, bss resources This patch removes the extern struct resource declarations for data_resource, code_resource and bss_resource on x86 and declares that three structures as static as done on other architectures like IA64. On i386, these structures are moved to setup_32.c (from e820_32.c) because that's code that is not specific to e820 and also required on EFI systems. That makes the "extern" reference superfluous. On x86_64, data_resource, code_resource and bss_resource are passed to e820_reserve_resources() as arguments just as done on i386 and IA64. That also avoids the "extern" reference and it's possible to make it static. Signed-off-by: Bernhard Walle Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e3d2d8c06658f9697230b68702fdad2d6f90da7a Author: Cyrill Gorcunov Date: Tue Dec 11 16:53:36 2007 +0100 x86: remove dead code in ia32-emu Remove useless second time checking of fsave argument in save_i387_ia32() routine. It's possible the compiler is doing the same but that is much better to remove the dead code explicitly. Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit dd21e3efb5f20b467ed8a552efe3be575adcd862 Author: Lucas Woods Date: Tue Dec 11 16:53:36 2007 +0100 x86: remove duplicate includes Signed-off-by: Lucas Woods Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 41a824ecf85b06c3b117096218201fce3927d603 Author: Paul Jimenez Date: Tue Dec 11 16:53:36 2007 +0100 x86: mtrr use type bool [RESEND AGAIN] This is a janitorish patch to 1) remove private TRUE/FALSE #def's in favor of using the standard enum from linux/stddef.h and 2) switch the variables holding those values to type 'bool' (from linux/types.h) since it both seems more appropriate and allows for potentially better optimization. As a truly minor aside, I removed a couple of comments documenting a 'do_safe' parameter that seems to no longer exist. Signed-off-by: Paul Jimenez Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 0cf978a67b0da5d17cfb88e804d80fdea9361222 Author: Adrian Bunk Date: Tue Dec 11 16:53:35 2007 +0100 x86: pci-dma_64.c: cleanups This patch contains the following cleanups: - make the needlessly global iommu_setup() static - remove the unused EXPORT_SYMBOL(iommu_merge) Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 9c748b9135ea9cea96d92fe43052c9bce882fca3 Author: Adrian Bunk Date: Tue Dec 11 16:53:35 2007 +0100 x86: pci-calgary_64.c: make a variable static "debugging" is a horrible name for a global variable - thankfully it can become static. Also put it out of __read_mostly so that gcc no longer has to emit it at all. Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 1a77820902a3e8c16f5266a761f362d582435ad4 Author: Adrian Bunk Date: Tue Dec 11 16:53:35 2007 +0100 x86: nmi_64.c: make code static This patch makes the following needlessly global code static: - panic_on_timeout - setup_nmi_watchdog() Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4a4e2bef99d7aec9fd3405dc315c203cb6a9cadb Author: Adrian Bunk Date: Tue Dec 11 16:53:35 2007 +0100 x86 mce_64.c: make struct mcelog static This patch makes the needlessly global struct mcelog static. Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 90068863f1dac22ffe7a88fccb985404061daff4 Author: Hiroshi Shimamoto Date: Tue Dec 11 16:53:35 2007 +0100 x86: io_apic_64.c: remove unused config check CONFIG_IRQBALANCE doesn't exist on x86_64. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d32359aabe8a4628d810d0733e148c9e2a241e35 Author: Adrian Bunk Date: Tue Dec 11 16:53:35 2007 +0100 x86 e820_64.c: make 2 functions static This patch makes the following needlessly global functions static: - e820_print_map() - early_panic() Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d6fb0a3559c71332d7458285685fdd878bbc16a5 Author: H. Peter Anvin Date: Tue Dec 11 16:53:34 2007 +0100 x86: actually merge This actually merges into . Signed-off-by: H. Peter Anvin Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b48faadb48661c6db6cbf102c0389a590a58b537 Author: H. Peter Anvin Date: Tue Dec 11 16:53:34 2007 +0100 x86: prepare merger of Prepare for merging by making the 32- and 64-bit versions textually identical. This involves: - removing arbitrary header inclusion differences - reorganizing the 32-bit version slightly to match the 64-bit version - using to unify the assembly code - renaming struct paravirt_patch to struct paravirt_patch_site in the 64-bit version to match the 32-bit version; there are no references to struct paravirt_patch elsewhere in the tree. Signed-off-by: H. Peter Anvin Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2d9510ff82cb5c2c828170c6565bbc7ee68cf696 Author: Paul Jimenez Date: Tue Dec 11 16:53:34 2007 +0100 x86: Make i8259_64 more _32-like Howdy! Here's a simple janitorish patch for you: This patch mainly hinges around two includes and their ramifications: #include which provides cached_{slave,master}_mask #include which provides PIC_{MASTER,SLAVE}_{IMR,CMD} Adding these two includes and using those half dozen or so definitions removed 140+ lines of diffs between i8259_32.c and i8259_64.c, thus making it easier for the real substantitive differences between them to show up, and hopefully therefore making it easier to eventually merge the two. All the warnings that checkpatch.pl throws (missing spaces after commas and >80 character lines) exist intentionally to match i8259_32.c. Signed-off-by: Paul Jimenez Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a1e701939a29f9737e14aee3cbf0b4fb2fe3f6c7 Author: Thomas Gleixner Date: Tue Dec 11 16:53:34 2007 +0100 x86: move 8259 defines to i8259.h Move the i8259 defines and remove the now io_ports.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 2fb990fa8c8c4d886ed00dbf61efeb43312d6b04 Author: Adrian Bunk Date: Tue Dec 11 16:53:33 2007 +0100 x86: unexport __{read,write}_lock_failed This patch removes the unused exports for __{read,write}_lock_failed. Signed-off-by: Adrian Bunk Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 0346c062e0dc562f0c407769977c115a89c1f146 Author: Dave Jones Date: Tue Dec 11 16:53:33 2007 +0100 Remove more bogus filenames in comments. Signed-off-by: Dave Jones Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit f9cc097cc2b8e6738a01f2f3d0ed3ec1c5044057 Author: Thomas Gleixner Date: Tue Dec 11 16:53:33 2007 +0100 x86: Nuke a ton of unused exports Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit c28b9c28fb97b6f408154a4d7d0883d919a2f039 Author: Thomas Gleixner Date: Tue Dec 11 16:53:33 2007 +0100 x86: Remove dead code and exports No users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 114f366db1c28da70adaeb03130c8dda6ba94e25 Author: Thomas Gleixner Date: Tue Dec 11 16:53:33 2007 +0100 x86: nuke a ton of dead hpet code No users, just ballast Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 00e8c51ed76fa350cf21341d24f4172377f6a40a Author: Thomas Gleixner Date: Tue Dec 11 16:53:32 2007 +0100 x86: smp_64.c: Remove unused exports and cleanup while at it The exports are nowhere used. There is even no reason why they were ever introduced. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit aa399dbf021253f685039b621bc545cf9a094b95 Author: Thomas Gleixner Date: Tue Dec 11 16:53:32 2007 +0100 x86: clean up arch/x86/kernel/time_64.c includes Reduce the lets include all to the minimum. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit b4e66a56fd23bb053e49d350a48c7cec8e5f571f Author: Thomas Gleixner Date: Tue Dec 11 16:53:32 2007 +0100 x86: share rtc code Remove the rtc code from time_64.c and add the extra bits to the i386 path. The ACPI century check is probably valid for i386 as well, but this is material for a separate patch. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 523ec58d78870efc5e0d8b6ab1bd6e4a7ff5bbd9 Author: Thomas Gleixner Date: Tue Dec 11 16:53:32 2007 +0100 x86: isolate the rtc code for sharing The mach-default/mach_time.h code inline is moved to arch/x86/kernel/rtc.c and the header files are adjusted. Shrink the 3 dozen includes to the ones we really need. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 3eeb2c75f2f56f68be69eb5b094ededc11e57589 Author: Thomas Gleixner Date: Tue Dec 11 16:53:32 2007 +0100 x86: unify mc146818rtc.h - prepare for sharing rtc code Unify mc146818rtc.h by adding the rtc_cmos_read/write functions to time_64.c. This is a preparatory patch to finaly share the rtc code, which is unsurprisingly similar. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit e527cfd1a50ebc38ffade4b5cc4d81fde3677a31 Author: Thomas Gleixner Date: Tue Dec 11 16:53:32 2007 +0100 x86: remove the duplicated arch/x86/ia32/mmap32.c Use mmap_32.c in arch/x86/mm instead Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 8f10e7ac416067ac5aca3b0aa0adf943b7add4ad Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: clean up arch/x86/mm/mmap_32/64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 535883e934be578d7708d7377349b9899d63224d Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: clean up arch/x86/kernel/vsmp_64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6a7eafb41b6ba5fec5974e37afdf0f442f7db87f Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: clean up ioport_32.c Remove unused variables, rename the "unused" argument to regp. It is used ! Codingstyle fixes. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 2ca387a1b9edb596ac3c251db30762ae02c08032 Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: simplify set_bitmap in ioport_32.c Simplify set_bitmap(). This is not in a hotpath and we really can use the straight forward loop through those bits. A similar implementation is used in the 64 bit code as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit c70129f7bdf2af67d8e1e1ffcab3d1595b0c2fd4 Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: merge include/asm-x86/scatterlist.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6444d8dee0dd7fb88ea3231ea06cb18c405772f1 Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: merge include/asm-x86/dma.h Almost identical. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 9b1d1c3e0a62f44b15b2c523756cfe188d8e53bc Author: Thomas Gleixner Date: Tue Dec 11 16:53:31 2007 +0100 x86: merge futex_32/64.h Finally merge them together. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit ed20eec7adb4052d61b8148bc238cb511c5d75ba Author: Thomas Gleixner Date: Tue Dec 11 16:53:30 2007 +0100 x86: prepare merging futex_32/64.h Replace .quad/.long with a define and use the same asm syntax for i386 and x86. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 975680c983c97c6d7d02a2c2b5f8aad27927dd53 Author: Thomas Gleixner Date: Tue Dec 11 16:53:30 2007 +0100 x86: prepare merging arch/x86/kernel/apic_32/64.c Shuffle code around, so we get a readable diff. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit e7b10bbeaf5978bc472444e1fa79bd690df8a0a7 Author: Thomas Gleixner Date: Tue Dec 11 16:53:30 2007 +0100 x86: make smp_local_timer_interrupt() static Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 657bff2e787056f1a89597c844971c3f1c3250d5 Author: Thomas Gleixner Date: Tue Dec 11 16:53:30 2007 +0100 x86: move ack_bad_irq into irq code Match i386, where we have this in the irq code. It belongs there. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5419baecd4e9d6bc9c04cf73e64cbad07c20d721 Author: Thomas Gleixner Date: Tue Dec 11 16:53:30 2007 +0100 x86: move ioapic code where it belongs The commit 399287229c775a8962a852a761d65dc9475dec7c hacked the ioapic resource mapping into apic.c for no good reason. Move the code into io_apic_64.c where it belongs. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5cccb5f5d086838aab704bb156c3a8de7ff71fd9 Author: Thomas Gleixner Date: Tue Dec 11 16:53:29 2007 +0100 x86: remove obsolte declarations from proto.h Nuke duplicate and obsolete crap from this ugly dump bin. There are still some entries left which need to be sorted out, but I'm tired of that puzzle game right now. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5dd811d94c6e8c790c95eb96c25c14356660cd8d Author: Thomas Gleixner Date: Tue Dec 11 16:53:29 2007 +0100 x86: remove duplicate start_kernel declaration start_kernel is already declared in a generic header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a38479df13f5eec5b36ca9d4fa75a7d3c3af5cd4 Author: Thomas Gleixner Date: Tue Dec 11 16:53:29 2007 +0100 x86: remove obsolete nohpet declaration Lonely user is hpet.c, so no need to declare it elsewhere. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4c3f54bbb37b26f7975415912b5679c0da11ff3e Author: Thomas Gleixner Date: Tue Dec 11 16:53:29 2007 +0100 x86: move pmtmr related declarations Move more stuff out of proto.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 98388ff78b4ed304b9f9336c3cd6208100dffe34 Author: Thomas Gleixner Date: Tue Dec 11 16:53:29 2007 +0100 x86: move tsc related declarations tsc has also it's own header file. Nuke the stupid 64 bit ifdef while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit e9916f03395e5405af18f02cbcd6b9ba3ba8ee13 Author: Thomas Gleixner Date: Tue Dec 11 16:53:29 2007 +0100 x86: move pda related declaration pda has its own header file as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 21c19aa4527741ea7288a5cfaf1ab5d2b98e4264 Author: Thomas Gleixner Date: Tue Dec 11 16:53:28 2007 +0100 x86: move page related declaration end_pfn is in page.h, so end_pfn_map has a place there as well Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 662b1d58c190838d3ca8280f010551bd44a11be0 Author: Thomas Gleixner Date: Tue Dec 11 16:53:28 2007 +0100 x86: move numa related declarations More stuff shuffeled to the correct place Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit cb52e38f70a38dc719e688959c6a62fac62faf4d Author: Thomas Gleixner Date: Tue Dec 11 16:53:28 2007 +0100 x86: move mce related declarations Move the mce related declarations where they belong, fix the users and remove 32bit dependency in mce.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 36d851ae14cfc29105ef537b128f5a78b6151e0d Author: Thomas Gleixner Date: Tue Dec 11 16:53:28 2007 +0100 x86: move debug related declarations to kdebug.h Move them and fixup some users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit ac14081782d97904371c8f62c32afc62896b51e5 Author: Thomas Gleixner Date: Tue Dec 11 16:53:27 2007 +0100 x86: move k8 related declarations Move k8 related declarations to k8.h and fix numa_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4d4953088376641706f1a5efa5e9bcc1e059efca Author: Thomas Gleixner Date: Tue Dec 11 16:53:27 2007 +0100 x86: move idle related declarations Move idle related declarations to processor_64.h, where the the others are as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit ca707bc28fbfcf400da3280560082d7db44ea9d0 Author: Thomas Gleixner Date: Tue Dec 11 16:53:26 2007 +0100 x86: make early_indentify_cpu static early_indentify_cpu is only used in setup_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 21a3d4351e8ec26fbb3527f8b1327b1bbed02c95 Author: Thomas Gleixner Date: Tue Dec 11 16:53:26 2007 +0100 x86: move acpi and pci declarations Move acpi/pci related declarations to the correct headers and remove the duplicate. Build fix from: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 55fc48e4b326ece056f783d7ddc10cf487f56351 Author: Thomas Gleixner Date: Tue Dec 11 16:53:26 2007 +0100 x86: remove duplicated declarations Remove declarations which are made already in the appropriate header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a2656bd51048889e329f622345b3e93377095e4a Author: Thomas Gleixner Date: Tue Dec 11 16:53:26 2007 +0100 x86: merge apic_32/64.h Unify apic.h variants. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 421b1e47b0471d36567091da486be7660a4471f0 Author: Thomas Gleixner Date: Tue Dec 11 16:53:25 2007 +0100 x86: use u32 for some lapic functions Use u32 so 32 and 64bit have the same interface. Andrew Morton: xen, lguest build fixes Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4f63edd5109357a8f2f882e3b05df4782abe9f56 Author: Thomas Gleixner Date: Tue Dec 11 16:53:25 2007 +0100 x86: use u32 for safe_apic_wait_icr_idle() Preperatory patch for merging apic headers. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 2f9a8c9d195b16c415238fa1876187578e9e8ec5 Author: Thomas Gleixner Date: Tue Dec 11 16:53:25 2007 +0100 x86: rename get_maxlvt to lapic_get_maxlvt Use the same name for the 32 and 64 bit variant. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 455a128fea154ceac45a9ccb1fed3c442ec83f01 Author: Thomas Gleixner Date: Tue Dec 11 16:53:25 2007 +0100 x86: prepare unification of include/asm-x86/apic_32/64.h White space and coding style clenaup. Move the K8 local apic defines to apicdef.h, where they belong Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit d87550ea47f829084ff838b3b8ad9b8bd6e65d10 Author: Thomas Gleixner Date: Tue Dec 11 16:53:25 2007 +0100 x86: Unify include/asm-x86/apicdef_32/64.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 57b1982e9c60dbd96c60fafcbed28e6689471dc2 Author: Thomas Gleixner Date: Tue Dec 11 16:53:25 2007 +0100 x86: merge arch/x86/kernel/ldt_32/64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 43e0e60aaf7bff0877d686941f2526d71379a18a Author: Thomas Gleixner Date: Tue Dec 11 16:53:24 2007 +0100 x86: prepare arch/x86/kernel/ldt_32/64.c for merging White space and coding style cleanups. Change unsigned to int. There is no win when we compare mincount against pc->size, which is an int as well. Casting pc->size to unsigned just might hide real problems. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 55b3335cd586def42b5ab8f1addfc1d88dc8c4f6 Author: Thomas Gleixner Date: Tue Dec 11 16:53:24 2007 +0100 x86: introduce ldt_write accessor Create a ldt write accessor like the 32 bit one. Preparatory patch for merging ldt.c and anyway necessary for 64bit paravirt ops. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 2aaac59ab59b059546620f6d1b527f163091d4c1 Author: Thomas Gleixner Date: Tue Dec 11 16:53:24 2007 +0100 x86: clean up include/asm-x86/desc_64.h White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit f608891937ce0746576b91b89286dced37a7af30 Author: Thomas Gleixner Date: Tue Dec 11 16:53:24 2007 +0100 x86: clean up arch/x86/kernel/ldt_32/64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 3ec8dd0e600e15df2ca480cc09b15f0033744755 Author: Thomas Gleixner Date: Tue Dec 11 16:53:24 2007 +0100 x86: clean up arch/x86/kernel/e820_64.c White space and coding style cleanup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 0875985d89cd8319190eaf7df355350a478625b9 Author: Ingo Molnar Date: Tue Dec 11 16:53:23 2007 +0100 x86: code cleanups in arch/x86/kernel/pci-gart_64.c code cleanups: errors lines of code errors/KLOC arch/x86/kernel/pci-gart_64.c 183 748 244.6 arch/x86/kernel/pci-gart_64.c 0 790 0 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ee3adcf65f4d75b797b1484d2aff4f670470d43e Author: Ingo Molnar Date: Tue Dec 11 16:53:23 2007 +0100 x86: lindent arch/i386/math-emu, cleanup manually clean up some of the damage that lindent caused. (this is a separate commit so that in the unlikely case of a typo we can bisect it down to the manual edits.) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 16207e842befe39d86f7e18e5af17d262e37755b Author: Ingo Molnar Date: Tue Dec 11 16:53:23 2007 +0100 x86: lindent arch/i386/math-emu lindent these files: errors lines of code errors/KLOC arch/x86/math-emu/ 2236 9424 237.2 arch/x86/math-emu/ 128 8706 14.7 no other changes. No code changed: text data bss dec hex filename 5589802 612739 3833856 10036397 9924ad vmlinux.before 5589802 612739 3833856 10036397 9924ad vmlinux.after the intent of this patch is to ease the automated tracking of kernel code quality - it's just much easier for us to maintain it if every file in arch/x86 is supposed to be clean. NOTE: it is a known problem of lindent that it causes some style damage of its own, but it's a safe tool (well, except for the gcc array range initializers extension), so we did the bulk of the changes via lindent, and did the manual fixups in a followup patch. the resulting math-emu code has been tested by Thomas Gleixner on a real 386 DX CPU as well, and it works fine. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b5058c99c22af7424617711bd5edd285399270b6 Author: Ingo Molnar Date: Tue Dec 11 16:53:22 2007 +0100 x86: mach-voyager, lindent lindent the mach-voyager files to get rid of more than 300 style errors: errors lines of code errors/KLOC arch/x86/mach-voyager/ [old] 409 3729 109.6 arch/x86/mach-voyager/ [new] 71 3678 19.3 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 64a8e03d8455a673eb3ea082cd0d1684a4f675eb Author: Ingo Molnar Date: Tue Dec 11 16:53:22 2007 +0100 x86: clean up arch/x86/kernel/aperture_64.c printk()s clean up arch/x86/kernel/aperture_64.c printk()s. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit bd200e9855ad61f5fabdb2935b80b0e91e382195 Author: Ingo Molnar Date: Tue Dec 11 16:53:22 2007 +0100 x86: clean up arch/x86/kernel/aperture_64.c whitespace cleanup. No code changed: text data bss dec hex filename 2080 76 4 2160 870 aperture_64.o.before 2080 76 4 2160 870 aperture_64.o.after errors lines of code errors/KLOC arch/x86/kernel/aperture_64.c 114 299 381.2 arch/x86/kernel/aperture_64.c 0 315 0 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d003974b4078236980a378fcc70b1db28b3870d5 Author: Thomas Gleixner Date: Tue Dec 11 16:53:21 2007 +0100 x86: clean up arch/x86/ia32/mmap32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 8c344235f323238dd1a2677938ffa01ccdcfc1cb Author: Thomas Gleixner Date: Tue Dec 11 16:53:21 2007 +0100 x86: clean up arch/x86/ia32/syscall32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 0b0fe78c1595827974934bc2ba7bc82b2c0d12f7 Author: Thomas Gleixner Date: Tue Dec 11 16:53:21 2007 +0100 x86: clean up arch/x86/ia32/sys_ia32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit dd2b690bd8fdcf70c67e89def4ad22e5e08a5998 Author: Thomas Gleixner Date: Tue Dec 11 16:53:20 2007 +0100 x86: clean up arch/x86/ia32/ptrace32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit c75207562e721a90c04ceabebf103e189e922d7d Author: Thomas Gleixner Date: Tue Dec 11 16:53:20 2007 +0100 x86: clean up arch/x86/ia32/ipc32.c White space and coding style cleanup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a1b5e358fde0ac896e52848ebb3a116fb3d10e5d Author: Thomas Gleixner Date: Tue Dec 11 16:53:20 2007 +0100 x86: clean up arch/x86/ia32/ia32_signal.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit f13bfe03a99da73ea9de46a1fd6d14cb22cc1b72 Author: Thomas Gleixner Date: Tue Dec 11 16:53:19 2007 +0100 x86: clean up arch/x86/ia32/aout32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 22e39df6938224492fe494793845b95521b49ac4 Author: Thomas Gleixner Date: Tue Dec 11 16:53:19 2007 +0100 x86: clean up arch/x86/ia32/fpu32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 349bb06956f4bd15991298008ab2264c940465f0 Author: Ingo Molnar Date: Tue Dec 11 16:53:19 2007 +0100 x86: clean up arch/x86/mm/pageattr_64.c clean up arch/x86/mm/pageattr_64.c. no code changed: text data bss dec hex filename 1751 16 0 1767 6e7 pageattr_64.o.before 1751 16 0 1767 6e7 pageattr_64.o.after Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 3d9c40187c0b735a5b66556d32f18be4379be36f Author: Ingo Molnar Date: Tue Dec 11 16:53:18 2007 +0100 x86: clean up arch/x86/mm/pageattr_32.c clean up arch/x86/mm/pageattr_32.c. no code changed: text data bss dec hex filename 1255 40 0 1295 50f pageattr_32.o.before 1255 40 0 1295 50f pageattr_32.o.after Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d1481d71638be875a66c245fb66e43ed8828f77b Author: Thomas Gleixner Date: Tue Dec 11 16:53:18 2007 +0100 x86: unify arch/x86/crypto/twofish_32/64.c Get rid of another duplicate file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit dc7cc1fe1b4e51f4ac74fc53a04009d73ef5b7c9 Author: H. Peter Anvin Date: Tue Dec 11 16:53:18 2007 +0100 x86: unify asm/cpufeature.h asm/cpufeature.h was already almost unified; this completes the job. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e4c92e98f9c9cc7e79c0fe2f7ad2eeed5f78e252 Author: H. Peter Anvin Date: Tue Dec 11 16:53:17 2007 +0100 x86: add Create , with common definitions suitable for assembly unification. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit be7256616f023a686c814dd40167d831c69cd95f Author: Roland McGrath Date: Tue Dec 11 16:53:17 2007 +0100 x86: protect against sigaltstack wraparound cf http://lkml.org/lkml/2007/10/3/41 To summarize: on Linux, SA_ONSTACK decides whether you are already on the signal stack based on the value of the SP at the time of a signal. If you are not already inside the range, you are not "on the signal stack" and so the new signal handler frame starts over at the base of the signal stack. sigaltstack (and sigstack before it) was invented in BSD. There, the SA_ONSTACK behavior has always been different. It uses a kernel state flag to decide, rather than the SP value. When you first take an SA_ONSTACK signal and switch to the alternate signal stack, it sets the SS_ONSTACK flag in the thread's sigaltstack state in the kernel. Thereafter you are "on the signal stack" and don't switch SP before pushing a handler frame no matter what the SP value is. Only when you sigreturn from the original handler context do you clear the SS_ONSTACK flag so that a new handler frame will start over at the base of the alternate signal stack. The undesireable effect of the Linux behavior is that an overflow of the alternate signal stack can not only go undetected, but lead to a ring buffer effect of clobbering the original handler frame at the base of the signal stack for each successive signal that comes just after the overflow. This is what Shi Weihua's test case demonstrates. Normally this does not come up because of the signal mask, but the test case uses SA_NODEFER for its SIGSEGV handler. The other subtle part of the existing Linux semantics is that a simple longjmp out of a signal handler serves to take you off the signal stack in a safe and reliable fashion without having used sigreturn (nor having just returned from the handler normally, which means the same). After the longjmp (or even informal stack switching not via any proper libc or kernel interface), the alternate signal stack stands ready to be used again. A paranoid program would allocate a PROT_NONE red zone around its alternate signal stack. Then a small overflow would trigger a SIGSEGV in handler setup, and be fatal (core dump) whether or not SIGSEGV is blocked. As with thread stack red zones, that cannot catch all overflows (or underflows). e.g., a local array as large as page size allocated in a function called from a handler, but not actually touched before more calls push more stack, could cause an overflow that silently pushes into some unrelated allocated pages. The BSD behavior does not do anything in particular about overflow. But it does at least avoid the wraparound or "ring buffer effect", so you'll just get a straightforward all-out overflow down your address space past the low end of the alternate signal stack. I don't know what the BSD behavior is for longjmp out of an SA_ONSTACK handler. The POSIX wording relating to sigaltstack is pretty minimal. I don't think it speaks to this issue one way or another. (The program that overflows its stack is clearly in undefined behavior territory of one sort or another anyhow.) Given the longjmp issue and the potential for highly subtle complications in existing programs relying on this in arcane ways deep in their code, I am very dubious about changing the behavior to the BSD style persistent flag. I think Shi Weihua's patches have a similar effect by tracking the SP used in the last handler setup. I think it would be sensible for the signal handler setup code to detect when it would itself be causing a stack overflow. Maybe something like the following patch (untested). This issue exists in the same way on all machines, so ideally they would all do a similar check. When it's the handler function itself or its callees that cause the overflow, rather than the signal handler frame setup alone crossing the boundary, this still won't help. But I don't see any way to distinguish that from the valid longjmp case. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b5940c5307b3463762e72365cdd6de909b3a5b5c Author: Ingo Molnar Date: Tue Dec 11 16:53:17 2007 +0100 x86: idle wakeup event in the HLT loop do a proper idle-wakeup event on HLT as well - some CPUs stop the TSC in HLT too, not just when going through the ACPI methods. (the ACPI idle code already does this.) [ update the 64-bit side too, as noticed by Jiri Slaby. ] Signed-off-by: Ingo Molnar commit 3fb0214ace73b48fc2d77c31d2bb253d140f8486 Author: Guillaume Chazarain Date: Tue Dec 11 16:53:17 2007 +0100 x86: scale cyc_2_nsec according to CPU frequency scale the sched_clock() cyc_2_nsec scaling factor according to CPU frequency changes. [ mingo@elte.hu: simplified it and fixed it for SMP. ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 7409d15e606e336bb6e19589e6f9702ba05e0cff Author: Ingo Molnar Date: Tue Dec 11 16:53:15 2007 +0100 x86: fix get_cycles_sync() overhead get_cycles_sync() is causing massive overhead in KVM networking: http://lkml.org/lkml/2007/12/11/54 remove the explicit CPUID serialization - it causes VM exits and is pointless: we care about GTOD coherency but that goes to user-space via a syscall, and syscalls are serialization points anyway. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Acked-by: Dor Laor Signed-off-by: Andrew Morton --- Documentation/kernel-parameters.txt | 5 arch/ia64/ia32/binfmt_elf32.c | 3 arch/mips/kernel/i8253.c | 12 arch/powerpc/kernel/ptrace.c | 52 arch/um/sys-i386/signal.c | 50 arch/um/sys-x86_64/signal.c | 70 arch/x86/Kconfig | 248 - arch/x86/Kconfig.cpu | 56 arch/x86/Makefile_32 | 18 arch/x86/Makefile_64 | 3 arch/x86/boot/compressed/relocs.c | 7 arch/x86/configs/x86_64_defconfig | 9 arch/x86/crypto/Makefile | 4 arch/x86/crypto/twofish.c | 101 arch/x86/crypto/twofish_32.c | 97 arch/x86/crypto/twofish_64.c | 97 arch/x86/ia32/Makefile | 42 arch/x86/ia32/fpu32.c | 132 arch/x86/ia32/ia32_aout.c | 244 - arch/x86/ia32/ia32_binfmt.c | 51 arch/x86/ia32/ia32_signal.c | 471 +- arch/x86/ia32/ia32entry.S | 11 arch/x86/ia32/ipc32.c | 30 arch/x86/ia32/mmap32.c | 79 arch/x86/ia32/ptrace32.c | 404 -- arch/x86/ia32/sys_ia32.c | 502 +-- arch/x86/ia32/syscall32.c | 83 arch/x86/ia32/syscall32_syscall.S | 17 arch/x86/ia32/tls32.c | 163 - arch/x86/ia32/vsyscall-sigreturn.S | 143 arch/x86/ia32/vsyscall-syscall.S | 69 arch/x86/ia32/vsyscall-sysenter.S | 95 arch/x86/ia32/vsyscall.lds | 80 arch/x86/kernel/Makefile_32 | 47 arch/x86/kernel/Makefile_64 | 10 arch/x86/kernel/acpi/boot.c | 3 arch/x86/kernel/acpi/wakeup_64.S | 32 arch/x86/kernel/alternative.c | 13 arch/x86/kernel/aperture_64.c | 280 - arch/x86/kernel/apic_32.c | 104 arch/x86/kernel/apic_64.c | 1474 ++++----- arch/x86/kernel/apm_32.c | 2 arch/x86/kernel/asm-offsets_32.c | 60 arch/x86/kernel/asm-offsets_64.c | 41 arch/x86/kernel/cpu/addon_cpuid_features.c | 2 arch/x86/kernel/cpu/bugs.c | 3 arch/x86/kernel/cpu/common.c | 4 arch/x86/kernel/cpu/cyrix.c | 6 arch/x86/kernel/cpu/intel.c | 22 arch/x86/kernel/cpu/mcheck/mce_64.c | 18 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 10 arch/x86/kernel/cpu/mtrr/amd.c | 2 arch/x86/kernel/cpu/mtrr/cyrix.c | 3 arch/x86/kernel/cpu/mtrr/generic.c | 19 arch/x86/kernel/cpu/mtrr/if.c | 15 arch/x86/kernel/cpu/mtrr/main.c | 8 arch/x86/kernel/cpu/mtrr/mtrr.h | 6 arch/x86/kernel/cpu/mtrr/state.c | 3 arch/x86/kernel/cpu/perfctr-watchdog.c | 1 arch/x86/kernel/doublefault_32.c | 15 arch/x86/kernel/ds.c | 429 ++ arch/x86/kernel/e820_32.c | 110 arch/x86/kernel/e820_64.c | 330 +- arch/x86/kernel/entry_32.S | 24 arch/x86/kernel/geode_32.c | 48 arch/x86/kernel/head64.c | 6 arch/x86/kernel/hpet.c | 4 arch/x86/kernel/i386_ksyms_32.c | 7 arch/x86/kernel/i8253.c | 12 arch/x86/kernel/i8259_32.c | 4 arch/x86/kernel/i8259_64.c | 154 arch/x86/kernel/init_task.c | 1 arch/x86/kernel/io_apic_32.c | 2 arch/x86/kernel/io_apic_64.c | 103 arch/x86/kernel/ioport_32.c | 58 arch/x86/kernel/ioport_64.c | 6 arch/x86/kernel/irq_32.c | 20 arch/x86/kernel/irq_64.c | 30 arch/x86/kernel/kprobes_32.c | 109 arch/x86/kernel/kprobes_64.c | 103 arch/x86/kernel/ldt.c | 264 + arch/x86/kernel/ldt_32.c | 248 - arch/x86/kernel/ldt_64.c | 250 - arch/x86/kernel/machine_kexec_64.c | 5 arch/x86/kernel/mpparse_32.c | 29 arch/x86/kernel/nmi_32.c | 15 arch/x86/kernel/nmi_64.c | 99 arch/x86/kernel/paravirt_32.c | 12 arch/x86/kernel/pci-calgary_64.c | 5 arch/x86/kernel/pci-dma_64.c | 3 arch/x86/kernel/pci-gart_64.c | 506 +-- arch/x86/kernel/pci-swiotlb_64.c | 1 arch/x86/kernel/pmtimer_64.c | 4 arch/x86/kernel/process_32.c | 376 -- arch/x86/kernel/process_64.c | 307 + arch/x86/kernel/ptrace.c | 1349 ++++++++ arch/x86/kernel/ptrace_32.c | 717 ---- arch/x86/kernel/ptrace_64.c | 621 --- arch/x86/kernel/reboot_fixups_32.c | 14 arch/x86/kernel/rtc.c | 196 + arch/x86/kernel/setup64.c | 3 arch/x86/kernel/setup_32.c | 111 arch/x86/kernel/setup_64.c | 399 +- arch/x86/kernel/signal_32.c | 220 - arch/x86/kernel/signal_64.c | 128 arch/x86/kernel/smp_32.c | 4 arch/x86/kernel/smp_64.c | 88 arch/x86/kernel/smpboot_32.c | 37 arch/x86/kernel/smpboot_64.c | 46 arch/x86/kernel/stacktrace.c | 1 arch/x86/kernel/step.c | 210 + arch/x86/kernel/suspend_64.c | 2 arch/x86/kernel/suspend_asm_64.S | 32 arch/x86/kernel/sys_x86_64.c | 98 arch/x86/kernel/sysenter_32.c | 346 -- arch/x86/kernel/time_32.c | 114 arch/x86/kernel/time_64.c | 170 - arch/x86/kernel/tls.c | 136 arch/x86/kernel/traps_32.c | 126 arch/x86/kernel/traps_64.c | 123 arch/x86/kernel/tsc_32.c | 43 arch/x86/kernel/tsc_64.c | 74 arch/x86/kernel/vm86_32.c | 110 arch/x86/kernel/vmi_32.c | 74 arch/x86/kernel/vmiclock_32.c | 1 arch/x86/kernel/vmlinux_32.lds.S | 6 arch/x86/kernel/vsmp_64.c | 11 arch/x86/kernel/vsyscall-int80_32.S | 53 arch/x86/kernel/vsyscall-note_32.S | 45 arch/x86/kernel/vsyscall-sigreturn_32.S | 143 arch/x86/kernel/vsyscall-sysenter_32.S | 122 arch/x86/kernel/vsyscall_32.S | 15 arch/x86/kernel/vsyscall_32.lds.S | 67 arch/x86/kernel/vsyscall_64.c | 6 arch/x86/kernel/x8664_ksyms_64.c | 7 arch/x86/lguest/boot.c | 32 arch/x86/mach-rdc321x/Makefile | 5 arch/x86/mach-rdc321x/gpio.c | 91 arch/x86/mach-rdc321x/platform.c | 68 arch/x86/mach-rdc321x/wdt.c | 275 + arch/x86/mach-visws/mpparse.c | 16 arch/x86/mach-voyager/setup.c | 32 arch/x86/mach-voyager/voyager_basic.c | 132 arch/x86/mach-voyager/voyager_cat.c | 601 +-- arch/x86/mach-voyager/voyager_smp.c | 684 +--- arch/x86/mach-voyager/voyager_thread.c | 52 arch/x86/math-emu/errors.c | 880 ++--- arch/x86/math-emu/exception.h | 9 arch/x86/math-emu/fpu_arith.c | 150 arch/x86/math-emu/fpu_asm.h | 1 arch/x86/math-emu/fpu_aux.c | 211 - arch/x86/math-emu/fpu_emu.h | 67 arch/x86/math-emu/fpu_entry.c | 1220 +++---- arch/x86/math-emu/fpu_etc.c | 185 - arch/x86/math-emu/fpu_proto.h | 28 arch/x86/math-emu/fpu_tags.c | 92 arch/x86/math-emu/fpu_trig.c | 2930 ++++++++---------- arch/x86/math-emu/get_address.c | 646 +-- arch/x86/math-emu/load_store.c | 452 +- arch/x86/math-emu/poly.h | 69 arch/x86/math-emu/poly_2xm1.c | 197 - arch/x86/math-emu/poly_atan.c | 347 +- arch/x86/math-emu/poly_l2.c | 386 +- arch/x86/math-emu/poly_sin.c | 643 +-- arch/x86/math-emu/poly_tan.c | 334 -- arch/x86/math-emu/reg_add_sub.c | 565 +-- arch/x86/math-emu/reg_compare.c | 573 +-- arch/x86/math-emu/reg_constant.c | 71 arch/x86/math-emu/reg_convert.c | 51 arch/x86/math-emu/reg_divide.c | 319 - arch/x86/math-emu/reg_ld_str.c | 2155 ++++++------- arch/x86/math-emu/reg_mul.c | 171 - arch/x86/math-emu/status_w.h | 8 arch/x86/mm/Makefile_64 | 2 arch/x86/mm/boot_ioremap_32.c | 24 arch/x86/mm/extable_32.c | 6 arch/x86/mm/fault_32.c | 38 arch/x86/mm/fault_64.c | 22 arch/x86/mm/highmem_32.c | 47 arch/x86/mm/init_32.c | 31 arch/x86/mm/init_64.c | 39 arch/x86/mm/ioremap_64.c | 20 arch/x86/mm/k8topology_64.c | 17 arch/x86/mm/mmap_32.c | 8 arch/x86/mm/mmap_64.c | 119 arch/x86/mm/numa_64.c | 246 - arch/x86/mm/pageattr_32.c | 151 arch/x86/mm/pageattr_64.c | 143 arch/x86/mm/srat_64.c | 57 arch/x86/oprofile/backtrace.c | 6 arch/x86/oprofile/op_model_athlon.c | 22 arch/x86/pci/fixup.c | 13 arch/x86/pci/init.c | 4 arch/x86/pci/mmconfig-shared.c | 210 + arch/x86/pci/pci.h | 1 arch/x86/power/cpu.c | 14 arch/x86/vdso/Makefile | 130 arch/x86/vdso/vclock_gettime.c | 1 arch/x86/vdso/vdso-layout.lds.S | 64 arch/x86/vdso/vdso-start.S | 2 arch/x86/vdso/vdso.lds.S | 94 arch/x86/vdso/vdso32-setup.c | 411 ++ arch/x86/vdso/vdso32.S | 19 arch/x86/vdso/vdso32/int80.S | 56 arch/x86/vdso/vdso32/note.S | 44 arch/x86/vdso/vdso32/sigreturn.S | 144 arch/x86/vdso/vdso32/syscall.S | 77 arch/x86/vdso/vdso32/sysenter.S | 116 arch/x86/vdso/vdso32/vdso32.lds.S | 37 arch/x86/vdso/vgetcpu.c | 4 arch/x86/vdso/vma.c | 18 arch/x86/vdso/voffset.h | 1 arch/x86/xen/enlighten.c | 30 arch/x86/xen/events.c | 2 arch/x86/xen/mmu.c | 4 arch/x86/xen/setup.c | 7 arch/x86/xen/smp.c | 8 drivers/acpi/bus.c | 2 drivers/char/hpet.c | 75 drivers/char/rtc.c | 238 - drivers/lguest/x86/core.c | 4 drivers/pci/probe.c | 11 fs/binfmt_elf.c | 114 include/asm-powerpc/ptrace.h | 7 include/asm-x86/Kbuild | 1 include/asm-x86/acpi.h | 146 include/asm-x86/acpi_32.h | 143 include/asm-x86/acpi_64.h | 153 include/asm-x86/alternative.h | 162 include/asm-x86/alternative_32.h | 154 include/asm-x86/alternative_64.h | 159 include/asm-x86/apic.h | 141 include/asm-x86/apic_32.h | 127 include/asm-x86/apic_64.h | 102 include/asm-x86/apicdef.h | 412 ++ include/asm-x86/apicdef_32.h | 375 -- include/asm-x86/apicdef_64.h | 392 -- include/asm-x86/arch_hooks.h | 5 include/asm-x86/asm.h | 20 include/asm-x86/bitops.h | 315 + include/asm-x86/bitops_32.h | 324 - include/asm-x86/bitops_64.h | 297 - include/asm-x86/bug.h | 3 include/asm-x86/checksum_64.h | 2 include/asm-x86/cmpxchg_32.h | 122 include/asm-x86/compat.h | 2 include/asm-x86/cpufeature.h | 201 + include/asm-x86/cpufeature_32.h | 176 - include/asm-x86/cpufeature_64.h | 30 include/asm-x86/desc_64.h | 114 include/asm-x86/dma.h | 318 + include/asm-x86/dma_32.h | 297 - include/asm-x86/dma_64.h | 304 - include/asm-x86/ds.h | 65 include/asm-x86/e820_32.h | 6 include/asm-x86/e820_64.h | 6 include/asm-x86/elf.h | 101 include/asm-x86/futex.h | 138 include/asm-x86/futex_32.h | 135 include/asm-x86/futex_64.h | 125 include/asm-x86/geode.h | 12 include/asm-x86/gpio.h | 6 include/asm-x86/hw_irq_64.h | 1 include/asm-x86/i387_32.h | 2 include/asm-x86/i387_64.h | 2 include/asm-x86/i8259.h | 17 include/asm-x86/ia32.h | 6 include/asm-x86/ia32_unistd.h | 2 include/asm-x86/ide.h | 2 include/asm-x86/idle.h | 1 include/asm-x86/io_apic.h | 158 include/asm-x86/io_apic_32.h | 155 include/asm-x86/io_apic_64.h | 138 include/asm-x86/irqflags.h | 246 + include/asm-x86/irqflags_32.h | 197 - include/asm-x86/irqflags_64.h | 176 - include/asm-x86/k8.h | 1 include/asm-x86/kdebug.h | 5 include/asm-x86/kexec_32.h | 36 include/asm-x86/kexec_64.h | 20 include/asm-x86/kprobes_32.h | 2 include/asm-x86/kprobes_64.h | 2 include/asm-x86/linkage.h | 21 include/asm-x86/linkage_32.h | 15 include/asm-x86/linkage_64.h | 6 include/asm-x86/mach-bigsmp/mach_apic.h | 12 include/asm-x86/mach-default/apm.h | 2 include/asm-x86/mach-default/io_ports.h | 25 include/asm-x86/mach-default/mach_apic.h | 16 include/asm-x86/mach-default/mach_time.h | 111 include/asm-x86/mach-default/mach_timer.h | 2 include/asm-x86/mach-default/mach_traps.h | 2 include/asm-x86/mach-es7000/mach_apic.h | 10 include/asm-x86/mach-generic/gpio.h | 15 include/asm-x86/mach-numaq/mach_apic.h | 10 include/asm-x86/mach-rdc321x/gpio.h | 56 include/asm-x86/mach-rdc321x/rdc321x_defs.h | 6 include/asm-x86/mach-summit/mach_apic.h | 16 include/asm-x86/mc146818rtc.h | 101 include/asm-x86/mc146818rtc_32.h | 97 include/asm-x86/mc146818rtc_64.h | 29 include/asm-x86/mce.h | 18 include/asm-x86/mmu_context_64.h | 2 include/asm-x86/mmzone_32.h | 3 include/asm-x86/mmzone_64.h | 6 include/asm-x86/mpspec.h | 116 include/asm-x86/mpspec_32.h | 81 include/asm-x86/mpspec_64.h | 233 - include/asm-x86/mpspec_def.h | 87 include/asm-x86/msr-index.h | 7 include/asm-x86/msr.h | 284 - include/asm-x86/mtrr.h | 8 include/asm-x86/nmi_32.h | 3 include/asm-x86/nmi_64.h | 5 include/asm-x86/numa_64.h | 8 include/asm-x86/page_32.h | 39 include/asm-x86/page_64.h | 30 include/asm-x86/paravirt.h | 31 include/asm-x86/pci.h | 17 include/asm-x86/pci_64.h | 1 include/asm-x86/pda.h | 1 include/asm-x86/pgtable_32.h | 8 include/asm-x86/pgtable_64.h | 31 include/asm-x86/processor.h | 78 include/asm-x86/processor_32.h | 150 include/asm-x86/processor_64.h | 50 include/asm-x86/proto.h | 67 include/asm-x86/ptrace-abi.h | 54 include/asm-x86/ptrace.h | 138 include/asm-x86/resume-trace.h | 23 include/asm-x86/resume-trace_32.h | 13 include/asm-x86/resume-trace_64.h | 13 include/asm-x86/rio.h | 4 include/asm-x86/rwlock.h | 1 include/asm-x86/rwsem.h | 2 include/asm-x86/scatterlist.h | 34 include/asm-x86/scatterlist_32.h | 28 include/asm-x86/scatterlist_64.h | 29 include/asm-x86/segment.h | 202 + include/asm-x86/segment_32.h | 147 include/asm-x86/segment_64.h | 53 include/asm-x86/sigcontext.h | 42 include/asm-x86/sigcontext32.h | 22 include/asm-x86/signal.h | 11 include/asm-x86/smp_32.h | 117 include/asm-x86/smp_64.h | 133 include/asm-x86/sparsemem.h | 35 include/asm-x86/sparsemem_32.h | 31 include/asm-x86/sparsemem_64.h | 26 include/asm-x86/spinlock.h | 225 + include/asm-x86/spinlock_32.h | 221 - include/asm-x86/spinlock_64.h | 167 - include/asm-x86/suspend_64.h | 2 include/asm-x86/system.h | 376 ++ include/asm-x86/system_32.h | 320 - include/asm-x86/system_64.h | 178 - include/asm-x86/thread_info_32.h | 16 include/asm-x86/thread_info_64.h | 33 include/asm-x86/time.h | 2 include/asm-x86/timer.h | 23 include/asm-x86/timex.h | 2 include/asm-x86/tlbflush.h | 157 include/asm-x86/tlbflush_32.h | 168 - include/asm-x86/tlbflush_64.h | 100 include/asm-x86/topology.h | 143 include/asm-x86/topology_32.h | 121 include/asm-x86/topology_64.h | 71 include/asm-x86/tsc.h | 48 include/asm-x86/user_32.h | 24 include/asm-x86/user_64.h | 41 include/asm-x86/vdso.h | 28 include/asm-x86/vsyscall32.h | 20 include/asm-x86/xor_32.h | 2 include/asm-x86/xor_64.h | 2 include/linux/acpi_pmtmr.h | 2 include/linux/compat.h | 4 include/linux/hpet.h | 3 include/linux/ioport.h | 2 include/linux/pci.h | 9 include/linux/pci_ids.h | 7 include/linux/ptrace.h | 75 include/linux/thread_info.h | 10 include/linux/timer.h | 6 kernel/ptrace.c | 74 kernel/signal.c | 4 kernel/softirq.c | 4 kernel/time/tick-sched.c | 6 kernel/time/timer_stats.c | 2 kernel/timer.c | 82 mm/mmap.c | 3 390 files changed, 21617 insertions(+), 24111 deletions(-) diff -puN Documentation/kernel-parameters.txt~git-x86 Documentation/kernel-parameters.txt --- a/Documentation/kernel-parameters.txt~git-x86 +++ a/Documentation/kernel-parameters.txt @@ -1964,6 +1964,11 @@ and is between 256 and 4096 characters. vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping + vdso32= [X86-32,X86-64] + vdso32=2: enable compat VDSO (default with COMPAT_VDSO) + vdso32=1: enable 32-bit VDSO (default) + vdso32=0: disable 32-bit VDSO mapping + vector= [IA-64,SMP] vector=percpu: enable percpu vector domain diff -puN arch/ia64/ia32/binfmt_elf32.c~git-x86 arch/ia64/ia32/binfmt_elf32.c --- a/arch/ia64/ia32/binfmt_elf32.c~git-x86 +++ a/arch/ia64/ia32/binfmt_elf32.c @@ -222,7 +222,8 @@ elf32_set_personality (void) } static unsigned long -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) +elf32_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt, + int prot, int type, unsigned long unused) { unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK; diff -puN arch/mips/kernel/i8253.c~git-x86 arch/mips/kernel/i8253.c --- a/arch/mips/kernel/i8253.c~git-x86 +++ a/arch/mips/kernel/i8253.c @@ -24,9 +24,7 @@ DEFINE_SPINLOCK(i8253_lock); static void init_pit_timer(enum clock_event_mode mode, struct clock_event_device *evt) { - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); + spin_lock(&i8253_lock); switch(mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -55,7 +53,7 @@ static void init_pit_timer(enum clock_ev /* Nothing to do here */ break; } - spin_unlock_irqrestore(&i8253_lock, flags); + spin_unlock(&i8253_lock); } /* @@ -65,12 +63,10 @@ static void init_pit_timer(enum clock_ev */ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) { - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); + spin_lock(&i8253_lock); outb_p(delta & 0xff , PIT_CH0); /* LSB */ outb(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); + spin_unlock(&i8253_lock); return 0; } diff -puN arch/powerpc/kernel/ptrace.c~git-x86 arch/powerpc/kernel/ptrace.c --- a/arch/powerpc/kernel/ptrace.c~git-x86 +++ a/arch/powerpc/kernel/ptrace.c @@ -256,7 +256,7 @@ static int set_evrregs(struct task_struc #endif /* CONFIG_SPE */ -static void set_single_step(struct task_struct *task) +void user_enable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; @@ -271,7 +271,7 @@ static void set_single_step(struct task_ set_tsk_thread_flag(task, TIF_SINGLESTEP); } -static void clear_single_step(struct task_struct *task) +void user_disable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; @@ -313,7 +313,7 @@ static int ptrace_set_debugreg(struct ta void ptrace_disable(struct task_struct *child) { /* make sure the single step bit is not set. */ - clear_single_step(child); + user_disable_single_step(child); } /* @@ -445,52 +445,6 @@ long arch_ptrace(struct task_struct *chi break; } - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: { /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - ret = 0; - break; - } - -/* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: { - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - break; - } - - case PTRACE_SINGLESTEP: { /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_single_step(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - break; - } - case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ diff -puN arch/um/sys-i386/signal.c~git-x86 arch/um/sys-i386/signal.c --- a/arch/um/sys-i386/signal.c~git-x86 +++ a/arch/um/sys-i386/signal.c @@ -3,10 +3,10 @@ * Licensed under the GPL */ -#include "linux/ptrace.h" -#include "asm/unistd.h" -#include "asm/uaccess.h" -#include "asm/ucontext.h" +#include +#include +#include +#include #include "frame_kern.h" #include "skas.h" @@ -18,17 +18,17 @@ void copy_sc(struct uml_pt_regs *regs, v REGS_FS(regs->gp) = sc->fs; REGS_ES(regs->gp) = sc->es; REGS_DS(regs->gp) = sc->ds; - REGS_EDI(regs->gp) = sc->edi; - REGS_ESI(regs->gp) = sc->esi; - REGS_EBP(regs->gp) = sc->ebp; - REGS_SP(regs->gp) = sc->esp; - REGS_EBX(regs->gp) = sc->ebx; - REGS_EDX(regs->gp) = sc->edx; - REGS_ECX(regs->gp) = sc->ecx; - REGS_EAX(regs->gp) = sc->eax; - REGS_IP(regs->gp) = sc->eip; + REGS_EDI(regs->gp) = sc->di; + REGS_ESI(regs->gp) = sc->si; + REGS_EBP(regs->gp) = sc->bp; + REGS_SP(regs->gp) = sc->sp; + REGS_EBX(regs->gp) = sc->bx; + REGS_EDX(regs->gp) = sc->dx; + REGS_ECX(regs->gp) = sc->cx; + REGS_EAX(regs->gp) = sc->ax; + REGS_IP(regs->gp) = sc->ip; REGS_CS(regs->gp) = sc->cs; - REGS_EFLAGS(regs->gp) = sc->eflags; + REGS_EFLAGS(regs->gp) = sc->flags; REGS_SS(regs->gp) = sc->ss; } @@ -229,18 +229,18 @@ static int copy_sc_to_user(struct sigcon sc.fs = REGS_FS(regs->regs.gp); sc.es = REGS_ES(regs->regs.gp); sc.ds = REGS_DS(regs->regs.gp); - sc.edi = REGS_EDI(regs->regs.gp); - sc.esi = REGS_ESI(regs->regs.gp); - sc.ebp = REGS_EBP(regs->regs.gp); - sc.esp = sp; - sc.ebx = REGS_EBX(regs->regs.gp); - sc.edx = REGS_EDX(regs->regs.gp); - sc.ecx = REGS_ECX(regs->regs.gp); - sc.eax = REGS_EAX(regs->regs.gp); - sc.eip = REGS_IP(regs->regs.gp); + sc.di = REGS_EDI(regs->regs.gp); + sc.si = REGS_ESI(regs->regs.gp); + sc.bp = REGS_EBP(regs->regs.gp); + sc.sp = sp; + sc.bx = REGS_EBX(regs->regs.gp); + sc.dx = REGS_EDX(regs->regs.gp); + sc.cx = REGS_ECX(regs->regs.gp); + sc.ax = REGS_EAX(regs->regs.gp); + sc.ip = REGS_IP(regs->regs.gp); sc.cs = REGS_CS(regs->regs.gp); - sc.eflags = REGS_EFLAGS(regs->regs.gp); - sc.esp_at_signal = regs->regs.gp[UESP]; + sc.flags = REGS_EFLAGS(regs->regs.gp); + sc.sp_at_signal = regs->regs.gp[UESP]; sc.ss = regs->regs.gp[SS]; sc.cr2 = fi->cr2; sc.err = fi->error_code; diff -puN arch/um/sys-x86_64/signal.c~git-x86 arch/um/sys-x86_64/signal.c --- a/arch/um/sys-x86_64/signal.c~git-x86 +++ a/arch/um/sys-x86_64/signal.c @@ -4,11 +4,11 @@ * Licensed under the GPL */ -#include "linux/personality.h" -#include "linux/ptrace.h" -#include "asm/unistd.h" -#include "asm/uaccess.h" -#include "asm/ucontext.h" +#include +#include +#include +#include +#include #include "frame_kern.h" #include "skas.h" @@ -27,16 +27,16 @@ void copy_sc(struct uml_pt_regs *regs, v GETREG(regs, R13, sc, r13); GETREG(regs, R14, sc, r14); GETREG(regs, R15, sc, r15); - GETREG(regs, RDI, sc, rdi); - GETREG(regs, RSI, sc, rsi); - GETREG(regs, RBP, sc, rbp); - GETREG(regs, RBX, sc, rbx); - GETREG(regs, RDX, sc, rdx); - GETREG(regs, RAX, sc, rax); - GETREG(regs, RCX, sc, rcx); - GETREG(regs, RSP, sc, rsp); - GETREG(regs, RIP, sc, rip); - GETREG(regs, EFLAGS, sc, eflags); + GETREG(regs, RDI, sc, di); + GETREG(regs, RSI, sc, si); + GETREG(regs, RBP, sc, bp); + GETREG(regs, RBX, sc, bx); + GETREG(regs, RDX, sc, dx); + GETREG(regs, RAX, sc, ax); + GETREG(regs, RCX, sc, cx); + GETREG(regs, RSP, sc, sp); + GETREG(regs, RIP, sc, ip); + GETREG(regs, EFLAGS, sc, flags); GETREG(regs, CS, sc, cs); #undef GETREG @@ -61,16 +61,16 @@ static int copy_sc_from_user(struct pt_r err |= GETREG(regs, R13, from, r13); err |= GETREG(regs, R14, from, r14); err |= GETREG(regs, R15, from, r15); - err |= GETREG(regs, RDI, from, rdi); - err |= GETREG(regs, RSI, from, rsi); - err |= GETREG(regs, RBP, from, rbp); - err |= GETREG(regs, RBX, from, rbx); - err |= GETREG(regs, RDX, from, rdx); - err |= GETREG(regs, RAX, from, rax); - err |= GETREG(regs, RCX, from, rcx); - err |= GETREG(regs, RSP, from, rsp); - err |= GETREG(regs, RIP, from, rip); - err |= GETREG(regs, EFLAGS, from, eflags); + err |= GETREG(regs, RDI, from, di); + err |= GETREG(regs, RSI, from, si); + err |= GETREG(regs, RBP, from, bp); + err |= GETREG(regs, RBX, from, bx); + err |= GETREG(regs, RDX, from, dx); + err |= GETREG(regs, RAX, from, ax); + err |= GETREG(regs, RCX, from, cx); + err |= GETREG(regs, RSP, from, sp); + err |= GETREG(regs, RIP, from, ip); + err |= GETREG(regs, EFLAGS, from, flags); err |= GETREG(regs, CS, from, cs); if (err) return 1; @@ -108,19 +108,19 @@ static int copy_sc_to_user(struct sigcon __put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)], \ &(sc)->regname) - err |= PUTREG(regs, RDI, to, rdi); - err |= PUTREG(regs, RSI, to, rsi); - err |= PUTREG(regs, RBP, to, rbp); + err |= PUTREG(regs, RDI, to, di); + err |= PUTREG(regs, RSI, to, si); + err |= PUTREG(regs, RBP, to, bp); /* * Must use orignal RSP, which is passed in, rather than what's in * the pt_regs, because that's already been updated to point at the * signal frame. */ - err |= __put_user(sp, &to->rsp); - err |= PUTREG(regs, RBX, to, rbx); - err |= PUTREG(regs, RDX, to, rdx); - err |= PUTREG(regs, RCX, to, rcx); - err |= PUTREG(regs, RAX, to, rax); + err |= __put_user(sp, &to->sp); + err |= PUTREG(regs, RBX, to, bx); + err |= PUTREG(regs, RDX, to, dx); + err |= PUTREG(regs, RCX, to, cx); + err |= PUTREG(regs, RAX, to, ax); err |= PUTREG(regs, R8, to, r8); err |= PUTREG(regs, R9, to, r9); err |= PUTREG(regs, R10, to, r10); @@ -135,8 +135,8 @@ static int copy_sc_to_user(struct sigcon err |= __put_user(fi->error_code, &to->err); err |= __put_user(fi->trap_no, &to->trapno); - err |= PUTREG(regs, RIP, to, rip); - err |= PUTREG(regs, EFLAGS, to, eflags); + err |= PUTREG(regs, RIP, to, ip); + err |= PUTREG(regs, EFLAGS, to, flags); #undef PUTREG err |= __put_user(mask, &to->oldmask); diff -puN arch/x86/Kconfig~git-x86 arch/x86/Kconfig --- a/arch/x86/Kconfig~git-x86 +++ a/arch/x86/Kconfig @@ -17,81 +17,66 @@ config X86_64 ### Arch settings config X86 - bool - default y + def_bool y config GENERIC_TIME - bool - default y + def_bool y config GENERIC_CMOS_UPDATE - bool - default y + def_bool y config CLOCKSOURCE_WATCHDOG - bool - default y + def_bool y config GENERIC_CLOCKEVENTS - bool - default y + def_bool y config GENERIC_CLOCKEVENTS_BROADCAST - bool - default y + def_bool y depends on X86_64 || (X86_32 && X86_LOCAL_APIC) config LOCKDEP_SUPPORT - bool - default y + def_bool y config STACKTRACE_SUPPORT - bool - default y + def_bool y config SEMAPHORE_SLEEPERS - bool - default y + def_bool y config MMU - bool - default y + def_bool y config ZONE_DMA - bool - default y + def_bool y config QUICKLIST - bool - default X86_32 + def_bool X86_32 config SBUS bool config GENERIC_ISA_DMA - bool - default y + def_bool y config GENERIC_IOMAP - bool - default y + def_bool y config GENERIC_BUG - bool - default y + def_bool y depends on BUG +config GENERIC_GPIO + def_bool n + config GENERIC_HWEIGHT - bool - default y + def_bool y config ARCH_MAY_HAVE_PC_FDC - bool - default y + def_bool y config DMI - bool - default y + def_bool y config RWSEM_GENERIC_SPINLOCK def_bool !X86_XADD @@ -298,6 +283,17 @@ config X86_ES7000 Only choose this option if you have such a system, otherwise you should say N here. +config X86_RDC321X + bool "RDC R-321x SoC" + select M486 + select X86_REBOOTFIXUPS + select GENERIC_GPIO + select LEDS_GPIO + help + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + config X86_VSMP bool "Support for ScaleMP vSMP" depends on X86_64 && PCI @@ -309,8 +305,8 @@ config X86_VSMP endchoice config SCHED_NO_NO_OMIT_FRAME_POINTER - bool "Single-depth WCHAN output" - default y + def_bool y + prompt "Single-depth WCHAN output" depends on X86_32 help Calculate simpler /proc//wchan values. If this option @@ -357,37 +353,31 @@ source "arch/x86/lguest/Kconfig" endif config ACPI_SRAT - bool - default y + def_bool y depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) select ACPI_NUMA config HAVE_ARCH_PARSE_SRAT - bool - default y - depends on ACPI_SRAT + def_bool y + depends on ACPI_SRAT config X86_SUMMIT_NUMA - bool - default y + def_bool y depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) config X86_CYCLONE_TIMER - bool - default y + def_bool y depends on X86_32 && X86_SUMMIT || X86_GENERICARCH config ES7000_CLUSTERED_APIC - bool - default y + def_bool y depends on SMP && X86_ES7000 && MPENTIUMIII source "arch/x86/Kconfig.cpu" config HPET_TIMER - bool + def_bool X86_64 prompt "HPET Timer Support" if X86_32 - default X86_64 help Use the IA-PC HPET (High Precision Event Timer) to manage time in preference to the PIT and RTC, if a HPET is @@ -405,9 +395,8 @@ config HPET_TIMER Choose N to continue using the legacy 8254 timer. config HPET_EMULATE_RTC - bool + def_bool y depends on HPET_TIMER && RTC=y - default y # Mark as embedded because too many people got it wrong. # The code disables itself when not needed. @@ -447,8 +436,8 @@ config CALGARY_IOMMU If unsure, say Y. config CALGARY_IOMMU_ENABLED_BY_DEFAULT - bool "Should Calgary be enabled by default?" - default y + def_bool y + prompt "Should Calgary be enabled by default?" depends on CALGARY_IOMMU help Should Calgary be enabled by default? if you choose 'y', Calgary @@ -496,9 +485,9 @@ config SCHED_SMT N here. config SCHED_MC - bool "Multi-core scheduler support" + def_bool y + prompt "Multi-core scheduler support" depends on (X86_64 && SMP) || (X86_32 && X86_HT) - default y help Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly @@ -532,19 +521,16 @@ config X86_UP_IOAPIC an IO-APIC, then the kernel will still run with no slowdown at all. config X86_LOCAL_APIC - bool + def_bool y depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) - default y config X86_IO_APIC - bool + def_bool y depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) - default y config X86_VISWS_APIC - bool + def_bool y depends on X86_32 && X86_VISWS - default y config X86_MCE bool "Machine Check Exception" @@ -564,17 +550,17 @@ config X86_MCE the 386 and 486, so nearly everyone can say Y here. config X86_MCE_INTEL - bool "Intel MCE features" + def_bool y + prompt "Intel MCE features" depends on X86_64 && X86_MCE && X86_LOCAL_APIC - default y help Additional support for intel specific MCE features such as the thermal monitor. config X86_MCE_AMD - bool "AMD MCE features" + def_bool y + prompt "AMD MCE features" depends on X86_64 && X86_MCE && X86_LOCAL_APIC - default y help Additional support for AMD specific MCE features such as the DRAM Error Threshold. @@ -647,9 +633,9 @@ config I8K Say N otherwise. config X86_REBOOTFIXUPS - bool "Enable X86 board specific fixups for reboot" + def_bool n + prompt "Enable X86 board specific fixups for reboot" depends on X86_32 && X86 - default n ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -658,7 +644,7 @@ config X86_REBOOTFIXUPS system. Currently, the only fixup is for the Geode machines using - CS5530A and CS5536 chipsets. + CS5530A and CS5536 chipsets and the RDC R-321x SoC. Say Y if you want to enable the fixup. Currently, it's safe to enable this option even if you don't need it. @@ -682,9 +668,8 @@ config MICROCODE module will be called microcode. config MICROCODE_OLD_INTERFACE - bool + def_bool y depends on MICROCODE - default y config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -808,13 +793,12 @@ config PAGE_OFFSET depends on X86_32 config HIGHMEM - bool + def_bool y depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) - default y config X86_PAE - bool "PAE (Physical Address Extension) Support" - default n + def_bool n + prompt "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G select RESOURCES_64BIT help @@ -846,10 +830,10 @@ comment "NUMA (Summit) requires SMP, 64G depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) config K8_NUMA - bool "Old style AMD Opteron NUMA detection" - depends on X86_64 && NUMA && PCI - default y - help + def_bool y + prompt "Old style AMD Opteron NUMA detection" + depends on X86_64 && NUMA && PCI + help Enable K8 NUMA node topology detection. You should say Y here if you have a multi processor AMD K8 system. This uses an old method to read the NUMA configuration directly from the builtin @@ -857,13 +841,21 @@ config K8_NUMA instead, which also takes priority if both are compiled in. config X86_64_ACPI_NUMA - bool "ACPI NUMA detection" + def_bool y + prompt "ACPI NUMA detection" depends on X86_64 && NUMA && ACPI && PCI select ACPI_NUMA - default y help Enable ACPI SRAT based node topology detection. +config THREAD_ORDER + int "Kernel stack size (in page order)" + range 1 3 + default "1" + depends on X86_64 && NUMA + help + Page order for the thread stack. + config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA @@ -880,46 +872,46 @@ config NODES_SHIFT depends on NEED_MULTIPLE_NODES config HAVE_ARCH_BOOTMEM_NODE - bool + def_bool y depends on X86_32 && NUMA - default y config ARCH_HAVE_MEMORY_PRESENT - bool + def_bool y depends on X86_32 && DISCONTIGMEM - default y config NEED_NODE_MEMMAP_SIZE - bool + def_bool y depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) - default y config HAVE_ARCH_ALLOC_REMAP - bool + def_bool y depends on X86_32 && NUMA - default y config ARCH_FLATMEM_ENABLE def_bool y - depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) + depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC config ARCH_DISCONTIGMEM_ENABLE def_bool y - depends on NUMA + depends on NUMA && X86_32 config ARCH_DISCONTIGMEM_DEFAULT def_bool y - depends on NUMA + depends on NUMA && X86_32 + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + depends on X86_64 config ARCH_SPARSEMEM_ENABLE def_bool y - depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 config ARCH_SELECT_MEMORY_MODEL def_bool y - depends on X86_32 && ARCH_SPARSEMEM_ENABLE + depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE def_bool X86_64 @@ -997,9 +989,9 @@ config MTRR See for more information. config EFI - bool "Boot from EFI support" + def_bool n + prompt "Boot from EFI support" depends on X86_32 && ACPI - default n ---help--- This enables the kernel to boot on EFI platforms using system configuration information passed to it from the firmware. @@ -1015,9 +1007,9 @@ config EFI kernel should continue to boot on existing non-EFI platforms. config IRQBALANCE - bool "Enable kernel irq balancing" + def_bool y + prompt "Enable kernel irq balancing" depends on X86_32 && SMP && X86_IO_APIC - default y help The default yes will allow the kernel to do irq load balancing. Saying no will keep the kernel from doing irq load balancing. @@ -1025,14 +1017,13 @@ config IRQBALANCE # turning this on wastes a bunch of space. # Summit needs it only when NUMA is on config BOOT_IOREMAP - bool + def_bool y depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) - default y config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS - default y help This kernel feature is useful for number crunching applications that may need to compute untrusted bytecode during their @@ -1199,11 +1190,11 @@ config HOTPLUG_CPU suspend. config COMPAT_VDSO - bool "Compat VDSO support" - default y - depends on X86_32 + def_bool y + prompt "Compat VDSO support" + depends on X86_32 || IA32_EMULATION help - Map the VDSO to the predictable old-style address too. + Map the 32-bit VDSO to the predictable old-style address too. ---help--- Say N here if you are running a sufficiently recent glibc version (2.3.3 or later), to remove the high-mapped @@ -1217,25 +1208,16 @@ config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on X86_64 || (X86_32 && HIGHMEM) -config MEMORY_HOTPLUG_RESERVE - def_bool X86_64 - depends on (MEMORY_HOTPLUG && DISCONTIGMEM) - config HAVE_ARCH_EARLY_PFN_TO_NID def_bool X86_64 depends on NUMA -config OUT_OF_LINE_PFN_TO_PAGE - def_bool X86_64 - depends on DISCONTIGMEM - menu "Power management options" depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER - bool + def_bool y depends on X86_64 && HIBERNATION - default y source "kernel/power/Kconfig" @@ -1428,25 +1410,21 @@ config PCI_GOANY endchoice config PCI_BIOS - bool + def_bool y depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) - default y # x86-64 doesn't support PCI BIOS access from long mode so always go direct. config PCI_DIRECT - bool + def_bool y depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS) - default y config PCI_MMCONFIG - bool + def_bool y depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) - default y config PCI_DOMAINS - bool + def_bool y depends on PCI - default y config PCI_MMCONFIG bool "Support mmconfig PCI config space access" @@ -1463,9 +1441,9 @@ config DMAR remapping devices. config DMAR_GFX_WA - bool "Support for Graphics workaround" + def_bool y + prompt "Support for Graphics workaround" depends on DMAR - default y help Current Graphics drivers tend to use physical address for DMA and avoid using DMA APIs. Setting this config @@ -1474,9 +1452,8 @@ config DMAR_GFX_WA to use physical addresses for DMA. config DMAR_FLOPPY_WA - bool + def_bool y depends on DMAR - default y help Floppy disk drivers are know to bypass DMA API calls thereby failing to work when IOMMU is enabled. This @@ -1489,8 +1466,7 @@ source "drivers/pci/Kconfig" # x86_64 have no ISA slots, but do have ISA-style DMA. config ISA_DMA_API - bool - default y + def_bool y if X86_32 @@ -1556,9 +1532,9 @@ config SCx200HR_TIMER other workaround is idle=poll boot option. config GEODE_MFGPT_TIMER - bool "Geode Multi-Function General Purpose Timer (MFGPT) events" + def_bool y + prompt "Geode Multi-Function General Purpose Timer (MFGPT) events" depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS - default y help This driver provides a clock event source based on the MFGPT timer(s) in the CS5535 and CS5536 companion chip for the geode. @@ -1597,18 +1573,16 @@ config IA32_AOUT Support old a.out binaries in the 32bit emulation. config COMPAT - bool + def_bool y depends on IA32_EMULATION - default y config COMPAT_FOR_U64_ALIGNMENT def_bool COMPAT depends on X86_64 config SYSVIPC_COMPAT - bool + def_bool y depends on X86_64 && COMPAT && SYSVIPC - default y endmenu diff -puN arch/x86/Kconfig.cpu~git-x86 arch/x86/Kconfig.cpu --- a/arch/x86/Kconfig.cpu~git-x86 +++ a/arch/x86/Kconfig.cpu @@ -219,10 +219,10 @@ config MGEODEGX1 Select this for a Geode GX1 (Cyrix MediaGX) chip. config MGEODE_LX - bool "Geode GX/LX" + bool "Geode GX/LX" depends on X86_32 - help - Select this for AMD Geode GX and LX processors. + help + Select this for AMD Geode GX and LX processors. config MCYRIXIII bool "CyrixIII/VIA-C3" @@ -258,7 +258,7 @@ config MPSC Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey Xeon CPUs with Intel 64bit which is compatible with x86-64. Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the - Netburst core and shouldn't use this option. You can distinguish them + Netburst core and shouldn't use this option. You can distinguish them using the cpu family field in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. @@ -317,81 +317,66 @@ config X86_L1_CACHE_SHIFT default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 config X86_XADD - bool + def_bool y depends on X86_32 && !M386 - default y config X86_PPRO_FENCE - bool + def_bool y depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 - default y config X86_F00F_BUG - bool + def_bool y depends on M586MMX || M586TSC || M586 || M486 || M386 - default y config X86_WP_WORKS_OK - bool + def_bool y depends on X86_32 && !M386 - default y config X86_INVLPG - bool + def_bool y depends on X86_32 && !M386 - default y config X86_BSWAP - bool + def_bool y depends on X86_32 && !M386 - default y config X86_POPAD_OK - bool + def_bool y depends on X86_32 && !M386 - default y config X86_ALIGNMENT_16 - bool + def_bool y depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 - default y config X86_GOOD_APIC - bool + def_bool y depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64 - default y config X86_INTEL_USERCOPY - bool + def_bool y depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 - default y config X86_USE_PPRO_CHECKSUM - bool + def_bool y depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 - default y config X86_USE_3DNOW - bool + def_bool y depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML - default y config X86_OOSTORE - bool + def_bool y depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR - default y config X86_TSC - bool + def_bool y depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 - default y # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV - bool + def_bool y depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) - default y config X86_MINIMUM_CPU_FAMILY int @@ -399,3 +384,6 @@ config X86_MINIMUM_CPU_FAMILY default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) default "3" +config X86_DEBUGCTLMSR + def_bool y + depends on !(M586MMX || M586TSC || M586 || M486 || M386) diff -puN arch/x86/Makefile_32~git-x86 arch/x86/Makefile_32 --- a/arch/x86/Makefile_32~git-x86 +++ a/arch/x86/Makefile_32 @@ -48,10 +48,6 @@ include $(srctree)/arch/x86/Makefile_32. # temporary until string.h is fixed cflags-y += -ffreestanding -# this works around some issues with generating unwind tables in older gccs -# newer gccs do it by default -cflags-y += -maccumulate-outgoing-args - # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use # a lot more stack due to the lack of sharing of stacklots: KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) @@ -85,6 +81,11 @@ mcore-$(CONFIG_X86_NUMAQ) := arch/x86/ma mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default +# RDC R-321x subarch support +mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x +mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default +core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/ + #Summit subarch support mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default @@ -114,7 +115,8 @@ libs-y += arch/x86/lib/ core-y += arch/x86/kernel/ \ arch/x86/mm/ \ $(mcore-y)/ \ - arch/x86/crypto/ + arch/x86/crypto/ \ + arch/x86/vdso/ drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ # must be linked after kernel/ @@ -152,9 +154,13 @@ zdisk bzdisk: vmlinux fdimage fdimage144 fdimage288 isoimage: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ -install: +install: vdso_install $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/x86/vdso $@ + archclean: $(Q)rm -rf $(objtree)/arch/i386/boot $(Q)$(MAKE) $(clean)=arch/x86/boot diff -puN arch/x86/Makefile_64~git-x86 arch/x86/Makefile_64 --- a/arch/x86/Makefile_64~git-x86 +++ a/arch/x86/Makefile_64 @@ -117,9 +117,6 @@ install: vdso_install $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ vdso_install: -ifeq ($(CONFIG_IA32_EMULATION),y) - $(Q)$(MAKE) $(build)=arch/x86/ia32 $@ -endif $(Q)$(MAKE) $(build)=arch/x86/vdso $@ archclean: diff -puN arch/x86/boot/compressed/relocs.c~git-x86 arch/x86/boot/compressed/relocs.c --- a/arch/x86/boot/compressed/relocs.c~git-x86 +++ a/arch/x86/boot/compressed/relocs.c @@ -27,11 +27,6 @@ static unsigned long *relocs; * absolute relocations present w.r.t these symbols. */ static const char* safe_abs_relocs[] = { - "__kernel_vsyscall", - "__kernel_rt_sigreturn", - "__kernel_sigreturn", - "SYSENTER_RETURN", - "VDSO_NOTE_MASK", "xen_irq_disable_direct_reloc", "xen_save_fl_direct_reloc", }; @@ -45,6 +40,8 @@ static int is_safe_abs_reloc(const char* /* Match found */ return 1; } + if (strncmp(sym_name, "VDSO", 4) == 0) + return 1; if (strncmp(sym_name, "__crc_", 6) == 0) return 1; return 0; diff -puN arch/x86/configs/x86_64_defconfig~git-x86 arch/x86/configs/x86_64_defconfig --- a/arch/x86/configs/x86_64_defconfig~git-x86 +++ a/arch/x86/configs/x86_64_defconfig @@ -145,15 +145,6 @@ CONFIG_K8_NUMA=y CONFIG_NODES_SHIFT=6 CONFIG_X86_64_ACPI_NUMA=y CONFIG_NUMA_EMU=y -CONFIG_ARCH_DISCONTIGMEM_ENABLE=y -CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_SELECT_MEMORY_MODEL=y -# CONFIG_FLATMEM_MANUAL is not set -CONFIG_DISCONTIGMEM_MANUAL=y -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_DISCONTIGMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 diff -puN arch/x86/crypto/Makefile~git-x86 arch/x86/crypto/Makefile --- a/arch/x86/crypto/Makefile~git-x86 +++ a/arch/x86/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o aes-i586-y := aes-i586-asm_32.o aes_32.o -twofish-i586-y := twofish-i586-asm_32.o twofish_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish.o aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o -twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish.o diff -puN /dev/null arch/x86/crypto/twofish.c --- /dev/null +++ a/arch/x86/crypto/twofish.c @@ -0,0 +1,101 @@ +/* + * Glue Code for optimized x86 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", +#ifdef CONFIG_X86_32 + .cra_driver_name = "twofish-i586", +#else + .cra_driver_name = "twofish-x86_64", +#endif + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Twofish Cipher Algorithm, x86 asm optimized"); +MODULE_ALIAS("twofish"); diff -puN arch/x86/crypto/twofish_32.c~git-x86 /dev/null --- a/arch/x86/crypto/twofish_32.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized 586 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include - - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-i586", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); -MODULE_ALIAS("twofish"); diff -puN arch/x86/crypto/twofish_64.c~git-x86 /dev/null --- a/arch/x86/crypto/twofish_64.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized x86_64 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-x86_64", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized"); -MODULE_ALIAS("twofish"); diff -puN arch/x86/ia32/Makefile~git-x86 arch/x86/ia32/Makefile --- a/arch/x86/ia32/Makefile~git-x86 +++ a/arch/x86/ia32/Makefile @@ -2,9 +2,8 @@ # Makefile for the ia32 kernel emulation subsystem. # -obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ - ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \ - mmap32.o +obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o \ + ia32_binfmt.o fpu32.o sysv-$(CONFIG_SYSVIPC) := ipc32.o obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) @@ -13,40 +12,3 @@ obj-$(CONFIG_IA32_AOUT) += ia32_aout.o audit-class-$(CONFIG_AUDIT) := audit.o obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y) - -$(obj)/syscall32_syscall.o: \ - $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so) - -# Teach kbuild about targets -targets := $(foreach F,$(addprefix vsyscall-,sysenter syscall),\ - $F.o $F.so $F.so.dbg) - -# The DSO images are built using a special linker script -quiet_cmd_syscall = SYSCALL $@ - cmd_syscall = $(CC) -m32 -nostdlib -shared \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) \ - -Wl,-soname=linux-gate.so.1 -o $@ \ - -Wl,-T,$(filter-out FORCE,$^) - -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -$(obj)/vsyscall-sysenter.so.dbg $(obj)/vsyscall-syscall.so.dbg: \ -$(obj)/vsyscall-%.so.dbg: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE - $(call if_changed,syscall) - -AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 - -vdsos := vdso32-sysenter.so vdso32-syscall.so - -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(@:vdso32-%.so=$(obj)/vsyscall-%.so.dbg) \ - $(MODLIB)/vdso/$@ - -$(vdsos): - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: $(vdsos) diff -puN arch/x86/ia32/fpu32.c~git-x86 arch/x86/ia32/fpu32.c --- a/arch/x86/ia32/fpu32.c~git-x86 +++ a/arch/x86/ia32/fpu32.c @@ -1,8 +1,8 @@ -/* +/* * Copyright 2002 Andi Kleen, SuSE Labs. * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes. * This is used for ptrace, signals and coredumps in 32bit emulation. - */ + */ #include #include @@ -13,96 +13,97 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) { unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; } +#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); +#define FP_EXP_TAG_VALID 0 +#define FP_EXP_TAG_ZERO 1 +#define FP_EXP_TAG_SPECIAL 2 +#define FP_EXP_TAG_EMPTY 3 + static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) { - struct _fpxreg *st = NULL; + struct _fpxreg *st; unsigned long tos = (fxsave->swd >> 11) & 7; unsigned long twd = (unsigned long) fxsave->twd; unsigned long tag; unsigned long ret = 0xffff0000; int i; -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); - - for (i = 0 ; i < 8 ; i++) { + for (i = 0; i < 8; i++, twd >>= 1) { if (twd & 0x1) { - st = FPREG_ADDR( fxsave, (i - tos) & 7 ); + st = FPREG_ADDR(fxsave, (i - tos) & 7); switch (st->exponent & 0x7fff) { case 0x7fff: - tag = 2; /* Special */ + tag = FP_EXP_TAG_SPECIAL; break; case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) + tag = FP_EXP_TAG_ZERO; + else + tag = FP_EXP_TAG_SPECIAL; break; default: - if (st->significand[3] & 0x8000) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } + if (st->significand[3] & 0x8000) + tag = FP_EXP_TAG_VALID; + else + tag = FP_EXP_TAG_SPECIAL; break; } } else { - tag = 3; /* Empty */ + tag = FP_EXP_TAG_EMPTY; } - ret |= (tag << (2 * i)); - twd = twd >> 1; + ret |= tag << (2 * i); } return ret; } +#define G(num, val) err |= __get_user(val, num + (u32 __user *)buf) static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave, struct _fpstate_ia32 __user *buf) { struct _fpxreg *to; struct _fpreg __user *from; - int i; + int i, err = 0; u32 v; - int err = 0; -#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf) G(0, fxsave->cwd); G(1, fxsave->swd); G(2, fxsave->twd); fxsave->twd = twd_i387_to_fxsr(fxsave->twd); G(3, fxsave->rip); G(4, v); - fxsave->fop = v>>16; /* cs ignored */ + /* cs ignored */ + fxsave->fop = v>>16; G(5, fxsave->rdp); /* 6: ds ignored */ -#undef G - if (err) - return -1; + if (err) + return -1; to = (struct _fpxreg *)&fxsave->st_space[0]; from = &buf->_st[0]; - for (i = 0 ; i < 8 ; i++, to++, from++) { + for (i = 0; i < 8; i++, to++, from++) { if (__copy_from_user(to, from, sizeof(*from))) return -1; } return 0; } +#define P(num, val) err |= __put_user(val, num + (u32 __user *)buf) static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf, struct i387_fxsave_struct *fxsave, @@ -111,60 +112,59 @@ static inline int convert_fxsr_to_user(s { struct _fpreg __user *to; struct _fpxreg *from; - int i; - u16 cs,ds; - int err = 0; + int i, err = 0; + u16 cs, ds; if (tsk == current) { - /* should be actually ds/cs at fpu exception time, - but that information is not available in 64bit mode. */ - asm("movw %%ds,%0 " : "=r" (ds)); - asm("movw %%cs,%0 " : "=r" (cs)); - } else { /* ptrace. task has stopped. */ + /* + * should be actually ds/cs at fpu exception time, but + * that information is not available in 64bit mode. + */ + asm("movw %%ds,%0 " : "=r" (ds)); + asm("movw %%cs,%0 " : "=r" (cs)); + } else { + /* ptrace. task has stopped. */ ds = tsk->thread.ds; cs = regs->cs; - } + } -#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf) P(0, (u32)fxsave->cwd | 0xffff0000); P(1, (u32)fxsave->swd | 0xffff0000); P(2, twd_fxsr_to_i387(fxsave)); P(3, (u32)fxsave->rip); - P(4, cs | ((u32)fxsave->fop) << 16); + P(4, cs | ((u32)fxsave->fop) << 16); P(5, fxsave->rdp); P(6, 0xffff0000 | ds); -#undef P - if (err) - return -1; + if (err) + return -1; to = &buf->_st[0]; from = (struct _fpxreg *) &fxsave->st_space[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + for (i = 0; i < 8; i++, to++, from++) { if (__copy_to_user(to, from, sizeof(*to))) return -1; } return 0; } -int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave) -{ +int restore_i387_ia32(struct task_struct *tsk, + struct _fpstate_ia32 __user *buf, int fsave) +{ clear_fpu(tsk); - if (!fsave) { - if (__copy_from_user(&tsk->thread.i387.fxsave, + if (!fsave) { + if (__copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct))) return -1; tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; set_stopped_child_used_math(tsk); - } + } return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf); -} +} -int save_i387_ia32(struct task_struct *tsk, - struct _fpstate_ia32 __user *buf, - struct pt_regs *regs, - int fsave) +int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, + struct pt_regs *regs, int fsave) { int err = 0; @@ -174,8 +174,6 @@ int save_i387_ia32(struct task_struct *t if (fsave) return 0; err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); - if (fsave) - return err ? -1 : 1; err |= __put_user(X86_FXSR_MAGIC, &buf->magic); err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, sizeof(struct i387_fxsave_struct)); diff -puN arch/x86/ia32/ia32_aout.c~git-x86 arch/x86/ia32/ia32_aout.c --- a/arch/x86/ia32/ia32_aout.c~git-x86 +++ a/arch/x86/ia32/ia32_aout.c @@ -36,61 +36,67 @@ #undef WARN_OLD #undef CORE_DUMP /* probably broken */ -static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_aout_library(struct file*); +static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); +static int load_aout_library(struct file *); #ifdef CORE_DUMP -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, + unsigned long limit); /* * fill in the user structure for a core dump.. */ -static void dump_thread32(struct pt_regs * regs, struct user32 * dump) +static void dump_thread32(struct pt_regs *regs, struct user32 *dump) { - u32 fs,gs; + u32 fs, gs; /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; - dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1); + dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) + (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; - - if (dump->start_stack < 0xc0000000) - dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs.ebx = regs->rbx; - dump->regs.ecx = regs->rcx; - dump->regs.edx = regs->rdx; - dump->regs.esi = regs->rsi; - dump->regs.edi = regs->rdi; - dump->regs.ebp = regs->rbp; - dump->regs.eax = regs->rax; + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; + + if (dump->start_stack < 0xc0000000) { + unsigned long tmp; + + tmp = (unsigned long) (0xc0000000 - dump->start_stack); + dump->u_ssize = tmp >> PAGE_SHIFT; + } + + dump->regs.bx = regs->bx; + dump->regs.cx = regs->cx; + dump->regs.dx = regs->dx; + dump->regs.si = regs->si; + dump->regs.di = regs->di; + dump->regs.bp = regs->bp; + dump->regs.ax = regs->ax; dump->regs.ds = current->thread.ds; dump->regs.es = current->thread.es; asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs; - asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; - dump->regs.orig_eax = regs->orig_rax; - dump->regs.eip = regs->rip; + asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; + dump->regs.orig_ax = regs->orig_ax; + dump->regs.ip = regs->ip; dump->regs.cs = regs->cs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->rsp; + dump->regs.flags = regs->flags; + dump->regs.sp = regs->sp; dump->regs.ss = regs->ss; #if 1 /* FIXME */ dump->u_fpvalid = 0; #else - dump->u_fpvalid = dump_fpu (regs, &dump->i387); + dump->u_fpvalid = dump_fpu(regs, &dump->i387); #endif } @@ -128,15 +134,19 @@ static int dump_write(struct file *file, return file->f_op->write(file, addr, nr, &file->f_pos) == nr; } -#define DUMP_WRITE(addr, nr) \ +#define DUMP_WRITE(addr, nr) \ if (!dump_write(file, (void *)(addr), (nr))) \ goto end_coredump; -#define DUMP_SEEK(offset) \ -if (file->f_op->llseek) { \ - if (file->f_op->llseek(file,(offset),0) != (offset)) \ - goto end_coredump; \ -} else file->f_pos = (offset) +#define DUMP_SEEK(offset) \ + if (file->f_op->llseek) { \ + if (file->f_op->llseek(file, (offset), 0) != (offset)) \ + goto end_coredump; \ + } else \ + file->f_pos = (offset) + +#define START_DATA() (u.u_tsize << PAGE_SHIFT) +#define START_STACK(u) (u.start_stack) /* * Routine writes a core dump image in the current directory. @@ -148,62 +158,70 @@ if (file->f_op->llseek) { \ * dumping of the process results in another error.. */ -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, + unsigned long limit) { mm_segment_t fs; int has_dumped = 0; unsigned long dump_start, dump_size; struct user32 dump; -# define START_DATA(u) (u.u_tsize << PAGE_SHIFT) -# define START_STACK(u) (u.start_stack) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; current->flags |= PF_DUMPCORE; - strncpy(dump.u_comm, current->comm, sizeof(current->comm)); - dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); + dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - + ((unsigned long)(&dump))); dump.signal = signr; dump_thread32(regs, &dump); -/* If the size of the dump file exceeds the rlimit, then see what would happen - if we wrote the stack, but not the data area. */ + /* + * If the size of the dump file exceeds the rlimit, then see + * what would happen if we wrote the stack, but not the data + * area. + */ if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_dsize = 0; -/* Make sure we have enough room to write the stack and data areas. */ + /* Make sure we have enough room to write the stack and data areas. */ if ((dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_ssize = 0; -/* make sure we actually have a data and stack area to dump */ + /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); - if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), + dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; - if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), + dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; set_fs(KERNEL_DS); -/* struct user */ - DUMP_WRITE(&dump,sizeof(dump)); -/* Now dump all of the user data. Include malloced stuff as well */ + /* struct user */ + DUMP_WRITE(&dump, sizeof(dump)); + /* Now dump all of the user data. Include malloced stuff as well */ DUMP_SEEK(PAGE_SIZE); -/* now we start writing out the user space info */ + /* now we start writing out the user space info */ set_fs(USER_DS); -/* Dump the data area */ + /* Dump the data area */ if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; - DUMP_WRITE(dump_start,dump_size); + DUMP_WRITE(dump_start, dump_size); } -/* Now prepare to dump the stack area */ + /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); dump_size = dump.u_ssize << PAGE_SHIFT; - DUMP_WRITE(dump_start,dump_size); + DUMP_WRITE(dump_start, dump_size); } -/* Finally dump the task struct. Not be used by gdb, but could be useful */ + /* + * Finally dump the task struct. Not be used by gdb, but + * could be useful + */ set_fs(KERNEL_DS); - DUMP_WRITE(current,sizeof(*current)); + DUMP_WRITE(current, sizeof(*current)); end_coredump: set_fs(fs); return has_dumped; @@ -217,35 +235,34 @@ end_coredump: */ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm) { - u32 __user *argv; - u32 __user *envp; - u32 __user *sp; - int argc = bprm->argc; - int envc = bprm->envc; + u32 __user *argv, *envp, *sp; + int argc = bprm->argc, envc = bprm->envc; sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p); sp -= envc+1; envp = sp; sp -= argc+1; argv = sp; - put_user((unsigned long) envp,--sp); - put_user((unsigned long) argv,--sp); - put_user(argc,--sp); + put_user((unsigned long) envp, --sp); + put_user((unsigned long) argv, --sp); + put_user(argc, --sp); current->mm->arg_start = (unsigned long) p; - while (argc-->0) { + while (argc-- > 0) { char c; - put_user((u32)(unsigned long)p,argv++); + + put_user((u32)(unsigned long)p, argv++); do { - get_user(c,p++); + get_user(c, p++); } while (c); } put_user(0, argv); current->mm->arg_end = current->mm->env_start = (unsigned long) p; - while (envc-->0) { + while (envc-- > 0) { char c; - put_user((u32)(unsigned long)p,envp++); + + put_user((u32)(unsigned long)p, envp++); do { - get_user(c,p++); + get_user(c, p++); } while (c); } put_user(0, envp); @@ -257,20 +274,18 @@ static u32 __user *create_aout_tables(ch * These are the functions used to load a.out style executables and shared * libraries. There is no binary dependent code anywhere else. */ - -static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) +static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) { + unsigned long error, fd_offset, rlim; struct exec ex; - unsigned long error; - unsigned long fd_offset; - unsigned long rlim; int retval; ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + i_size_read(bprm->file->f_path.dentry->d_inode) < + ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -291,13 +306,13 @@ static int load_aout_binary(struct linux if (retval) return retval; - regs->cs = __USER32_CS; + regs->cs = __USER32_CS; regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; /* OK, This is the point of no return */ set_personality(PER_LINUX); - set_thread_flag(TIF_IA32); + set_thread_flag(TIF_IA32); clear_thread_flag(TIF_ABI_PENDING); current->mm->end_code = ex.a_text + @@ -311,7 +326,7 @@ static int load_aout_binary(struct linux current->mm->mmap = NULL; compute_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; + current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; @@ -338,30 +353,30 @@ static int load_aout_binary(struct linux send_sig(SIGKILL, current, 0); return error; } - + flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); } else { #ifdef WARN_OLD static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) - { + (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) { printk(KERN_NOTICE "executable not page aligned\n"); error_time2 = jiffies; } if ((fd_offset & ~PAGE_MASK) != 0 && - (jiffies-error_time) > 5*HZ) - { - printk(KERN_WARNING - "fd_offset is not page aligned. Please convert program: %s\n", + (jiffies - error_time) > 5*HZ) { + printk(KERN_WARNING + "fd_offset is not page aligned. Please convert " + "program: %s\n", bprm->file->f_path.dentry->d_name.name); error_time = jiffies; } #endif - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { + if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { loff_t pos = fd_offset; + down_write(¤t->mm->mmap_sem); do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); up_write(¤t->mm->mmap_sem); @@ -376,9 +391,10 @@ static int load_aout_binary(struct linux down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, - PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, - fd_offset); + PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | + MAP_EXECUTABLE | MAP_32BIT, + fd_offset); up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { @@ -387,9 +403,10 @@ static int load_aout_binary(struct linux } down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | + MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { @@ -403,9 +420,9 @@ beyond_if: set_brk(current->mm->start_brk, current->mm->brk); retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); return retval; } @@ -414,10 +431,10 @@ beyond_if: /* start thread */ asm volatile("movl %0,%%fs" :: "r" (0)); \ asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); - load_gs_index(0); - (regs)->rip = ex.a_entry; - (regs)->rsp = current->mm->start_stack; - (regs)->eflags = 0x200; + load_gs_index(0); + (regs)->ip = ex.a_entry; + (regs)->sp = current->mm->start_stack; + (regs)->flags = 0x200; (regs)->cs = __USER32_CS; (regs)->ss = __USER32_DS; regs->r8 = regs->r9 = regs->r10 = regs->r11 = @@ -425,7 +442,7 @@ beyond_if: set_fs(USER_DS); if (unlikely(current->ptrace & PT_PTRACED)) { if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); + ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); else send_sig(SIGTRAP, current, 0); } @@ -434,9 +451,8 @@ beyond_if: static int load_aout_library(struct file *file) { - struct inode * inode; - unsigned long bss, start_addr, len; - unsigned long error; + struct inode *inode; + unsigned long bss, start_addr, len, error; int retval; struct exec ex; @@ -450,7 +466,8 @@ static int load_aout_library(struct file /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || - i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + i_size_read(inode) < + ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { goto out; } @@ -467,10 +484,10 @@ static int load_aout_library(struct file #ifdef WARN_OLD static unsigned long error_time; - if ((jiffies-error_time) > 5*HZ) - { - printk(KERN_WARNING - "N_TXTOFF is not page aligned. Please convert library: %s\n", + if ((jiffies-error_time) > 5*HZ) { + printk(KERN_WARNING + "N_TXTOFF is not page aligned. Please convert " + "library: %s\n", file->f_path.dentry->d_name.name); error_time = jiffies; } @@ -478,11 +495,12 @@ static int load_aout_library(struct file down_write(¤t->mm->mmap_sem); do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); up_write(¤t->mm->mmap_sem); - + file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); flush_icache_range((unsigned long) start_addr, - (unsigned long) start_addr + ex.a_text + ex.a_data); + (unsigned long) start_addr + ex.a_text + + ex.a_data); retval = 0; goto out; diff -puN arch/x86/ia32/ia32_binfmt.c~git-x86 arch/x86/ia32/ia32_binfmt.c --- a/arch/x86/ia32/ia32_binfmt.c~git-x86 +++ a/arch/x86/ia32/ia32_binfmt.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #undef ELF_ARCH #undef ELF_CLASS @@ -47,14 +47,13 @@ #define AT_SYSINFO 32 #define AT_SYSINFO_EHDR 33 -int sysctl_vsyscall32 = 1; +extern int sysctl_vsyscall32; #undef ARCH_DLINFO #define ARCH_DLINFO do { \ if (sysctl_vsyscall32) { \ - current->mm->context.vdso = (void *)VSYSCALL32_BASE; \ - NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \ + NEW_AUX_ENT(AT_SYSINFO, (u32)VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, (u32)VDSO_CURRENT_BASE); \ } \ } while(0) @@ -74,22 +73,22 @@ struct file; /* Assumes current==process to be dumped */ #undef ELF_CORE_COPY_REGS #define ELF_CORE_COPY_REGS(pr_reg, regs) \ - pr_reg[0] = regs->rbx; \ - pr_reg[1] = regs->rcx; \ - pr_reg[2] = regs->rdx; \ - pr_reg[3] = regs->rsi; \ - pr_reg[4] = regs->rdi; \ - pr_reg[5] = regs->rbp; \ - pr_reg[6] = regs->rax; \ + pr_reg[0] = regs->bx; \ + pr_reg[1] = regs->cx; \ + pr_reg[2] = regs->dx; \ + pr_reg[3] = regs->si; \ + pr_reg[4] = regs->di; \ + pr_reg[5] = regs->bp; \ + pr_reg[6] = regs->ax; \ pr_reg[7] = _GET_SEG(ds); \ pr_reg[8] = _GET_SEG(es); \ pr_reg[9] = _GET_SEG(fs); \ pr_reg[10] = _GET_SEG(gs); \ - pr_reg[11] = regs->orig_rax; \ - pr_reg[12] = regs->rip; \ + pr_reg[11] = regs->orig_ax; \ + pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs; \ - pr_reg[14] = regs->eflags; \ - pr_reg[15] = regs->rsp; \ + pr_reg[14] = regs->flags; \ + pr_reg[15] = regs->sp; \ pr_reg[16] = regs->ss; @@ -206,9 +205,9 @@ do { \ asm volatile("movl %0,%%fs" :: "r" (0)); \ asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \ load_gs_index(0); \ - (regs)->rip = (new_rip); \ - (regs)->rsp = (new_rsp); \ - (regs)->eflags = 0x200; \ + (regs)->ip = (new_rip); \ + (regs)->sp = (new_rsp); \ + (regs)->flags = X86_EFLAGS_IF; \ (regs)->cs = __USER32_CS; \ (regs)->ss = __USER32_DS; \ set_fs(USER_DS); \ @@ -234,13 +233,13 @@ extern int syscall32_setup_pages(struct static void elf32_init(struct pt_regs *regs) { struct task_struct *me = current; - regs->rdi = 0; - regs->rsi = 0; - regs->rdx = 0; - regs->rcx = 0; - regs->rax = 0; - regs->rbx = 0; - regs->rbp = 0; + regs->di = 0; + regs->si = 0; + regs->dx = 0; + regs->cx = 0; + regs->ax = 0; + regs->bx = 0; + regs->bp = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; me->thread.fs = 0; diff -puN arch/x86/ia32/ia32_signal.c~git-x86 arch/x86/ia32/ia32_signal.c --- a/arch/x86/ia32/ia32_signal.c~git-x86 +++ a/arch/x86/ia32/ia32_signal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #define DEBUG_SIG 0 @@ -43,7 +43,8 @@ void signal_fault(struct pt_regs *regs, int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err; - if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; /* If you change siginfo_t structure, please make sure that @@ -53,16 +54,19 @@ int copy_siginfo_to_user32(compat_siginf 3 ints plus the relevant union member. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user((short)from->si_code, &to->si_code); if (from->si_code < 0) { err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); } else { - /* First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) */ - err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]); + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + err |= __put_user(from->_sifields._pad[0], + &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: break; @@ -76,14 +80,15 @@ int copy_siginfo_to_user32(compat_siginf err |= __put_user(from->si_uid, &to->si_uid); break; case __SI_POLL >> 16: - err |= __put_user(from->si_fd, &to->si_fd); + err |= __put_user(from->si_fd, &to->si_fd); break; case __SI_TIMER >> 16: - err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(ptr_to_compat(from->si_ptr), - &to->si_ptr); + &to->si_ptr); break; - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: case __SI_MESGQ >> 16: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); @@ -97,7 +102,8 @@ int copy_siginfo_from_user32(siginfo_t * { int err; u32 ptr32; - if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) return -EFAULT; err = __get_user(to->si_signo, &from->si_signo); @@ -112,8 +118,7 @@ int copy_siginfo_from_user32(siginfo_t * return err; } -asmlinkage long -sys32_sigsuspend(int history0, int history1, old_sigset_t mask) +asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) { mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); @@ -128,36 +133,37 @@ sys32_sigsuspend(int history0, int histo return -ERESTARTNOHAND; } -asmlinkage long -sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, - stack_ia32_t __user *uoss_ptr, - struct pt_regs *regs) +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, + stack_ia32_t __user *uoss_ptr, + struct pt_regs *regs) { - stack_t uss,uoss; + stack_t uss, uoss; int ret; - mm_segment_t seg; - if (uss_ptr) { + mm_segment_t seg; + + if (uss_ptr) { u32 ptr; - memset(&uss,0,sizeof(stack_t)); - if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) || + + memset(&uss, 0, sizeof(stack_t)); + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) || __get_user(ptr, &uss_ptr->ss_sp) || __get_user(uss.ss_flags, &uss_ptr->ss_flags) || __get_user(uss.ss_size, &uss_ptr->ss_size)) return -EFAULT; uss.ss_sp = compat_ptr(ptr); } - seg = get_fs(); - set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp); - set_fs(seg); + seg = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + set_fs(seg); if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) || + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) || __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || __put_user(uoss.ss_size, &uoss_ptr->ss_size)) ret = -EFAULT; - } - return ret; + } + return ret; } /* @@ -186,87 +192,85 @@ struct rt_sigframe char retcode[8]; }; -static int -ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax) -{ - unsigned int err = 0; - +#define COPY(x) { \ + unsigned int reg; \ + err |= __get_user(reg, &sc->x); \ + regs->x = reg; \ +} + +#define RELOAD_SEG(seg,mask) \ + { unsigned int cur; \ + unsigned short pre; \ + err |= __get_user(pre, &sc->seg); \ + asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ + pre |= mask; \ + if (pre != cur) loadsegment(seg, pre); } + +static int ia32_restore_sigcontext(struct pt_regs *regs, + struct sigcontext_ia32 __user *sc, + unsigned int *peax) +{ + unsigned int tmpflags, gs, oldgs, err = 0; + struct _fpstate_ia32 __user *buf; + u32 tmp; + /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; #if DEBUG_SIG - printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", - sc, sc->err, sc->eip, sc->cs, sc->eflags); + printk(KERN_DEBUG "SIG restore_sigcontext: " + "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", + sc, sc->err, sc->ip, sc->cs, sc->flags); #endif -#define COPY(x) { \ - unsigned int reg; \ - err |= __get_user(reg, &sc->e ##x); \ - regs->r ## x = reg; \ -} -#define RELOAD_SEG(seg,mask) \ - { unsigned int cur; \ - unsigned short pre; \ - err |= __get_user(pre, &sc->seg); \ - asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ - pre |= mask; \ - if (pre != cur) loadsegment(seg,pre); } - - /* Reload fs and gs if they have changed in the signal handler. - This does not handle long fs/gs base changes in the handler, but - does not clobber them at least in the normal case. */ - - { - unsigned gs, oldgs; - err |= __get_user(gs, &sc->gs); - gs |= 3; - asm("movl %%gs,%0" : "=r" (oldgs)); - if (gs != oldgs) - load_gs_index(gs); - } - RELOAD_SEG(fs,3); - RELOAD_SEG(ds,3); - RELOAD_SEG(es,3); + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + err |= __get_user(gs, &sc->gs); + gs |= 3; + asm("movl %%gs,%0" : "=r" (oldgs)); + if (gs != oldgs) + load_gs_index(gs); + + RELOAD_SEG(fs, 3); + RELOAD_SEG(ds, 3); + RELOAD_SEG(es, 3); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); - /* Don't touch extended registers */ - - err |= __get_user(regs->cs, &sc->cs); - regs->cs |= 3; - err |= __get_user(regs->ss, &sc->ss); - regs->ss |= 3; - - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); - regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - regs->orig_rax = -1; /* disable syscall checks */ - } + /* Don't touch extended registers */ + + err |= __get_user(regs->cs, &sc->cs); + regs->cs |= 3; + err |= __get_user(regs->ss, &sc->ss); + regs->ss |= 3; + + err |= __get_user(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); + /* disable syscall checks */ + regs->orig_ax = -1; + + err |= __get_user(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387_ia32(current, buf, 0); + } else { + struct task_struct *me = current; - { - u32 tmp; - struct _fpstate_ia32 __user * buf; - err |= __get_user(tmp, &sc->fpstate); - buf = compat_ptr(tmp); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387_ia32(current, buf, 0); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } + if (used_math()) { + clear_fpu(me); + clear_used_math(); } } - { - u32 tmp; - err |= __get_user(tmp, &sc->eax); - *peax = tmp; - } + err |= __get_user(tmp, &sc->ax); + *peax = tmp; + return err; badframe: @@ -275,15 +279,16 @@ badframe: asmlinkage long sys32_sigreturn(struct pt_regs *regs) { - struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8); + struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8); sigset_t set; - unsigned int eax; + unsigned int ax; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) || (_COMPAT_NSIG_WORDS > 1 - && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask, + && __copy_from_user((((char *) &set.sig) + 4), + &frame->extramask, sizeof(frame->extramask)))) goto badframe; @@ -292,24 +297,24 @@ asmlinkage long sys32_sigreturn(struct p current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - - if (ia32_restore_sigcontext(regs, &frame->sc, &eax)) + + if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) goto badframe; - return eax; + return ax; badframe: signal_fault(regs, frame, "32bit sigreturn"); return 0; -} +} asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; sigset_t set; - unsigned int eax; + unsigned int ax; struct pt_regs tregs; - frame = (struct rt_sigframe __user *)(regs->rsp - 4); + frame = (struct rt_sigframe __user *)(regs->sp - 4); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -321,28 +326,28 @@ asmlinkage long sys32_rt_sigreturn(struc current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - - if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + + if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; tregs = *regs; if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) goto badframe; - return eax; + return ax; badframe: - signal_fault(regs,frame,"32bit rt sigreturn"); + signal_fault(regs, frame, "32bit rt sigreturn"); return 0; -} +} /* * Set up a signal frame. */ -static int -ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate, - struct pt_regs *regs, unsigned int mask) +static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, + struct _fpstate_ia32 __user *fpstate, + struct pt_regs *regs, unsigned int mask) { int tmp, err = 0; @@ -356,26 +361,26 @@ ia32_setup_sigcontext(struct sigcontext_ __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp)); err |= __put_user(tmp, (unsigned int __user *)&sc->es); - err |= __put_user((u32)regs->rdi, &sc->edi); - err |= __put_user((u32)regs->rsi, &sc->esi); - err |= __put_user((u32)regs->rbp, &sc->ebp); - err |= __put_user((u32)regs->rsp, &sc->esp); - err |= __put_user((u32)regs->rbx, &sc->ebx); - err |= __put_user((u32)regs->rdx, &sc->edx); - err |= __put_user((u32)regs->rcx, &sc->ecx); - err |= __put_user((u32)regs->rax, &sc->eax); + err |= __put_user((u32)regs->di, &sc->di); + err |= __put_user((u32)regs->si, &sc->si); + err |= __put_user((u32)regs->bp, &sc->bp); + err |= __put_user((u32)regs->sp, &sc->sp); + err |= __put_user((u32)regs->bx, &sc->bx); + err |= __put_user((u32)regs->dx, &sc->dx); + err |= __put_user((u32)regs->cx, &sc->cx); + err |= __put_user((u32)regs->ax, &sc->ax); err |= __put_user((u32)regs->cs, &sc->cs); err |= __put_user((u32)regs->ss, &sc->ss); err |= __put_user(current->thread.trap_no, &sc->trapno); err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user((u32)regs->rip, &sc->eip); - err |= __put_user((u32)regs->eflags, &sc->eflags); - err |= __put_user((u32)regs->rsp, &sc->esp_at_signal); + err |= __put_user((u32)regs->ip, &sc->ip); + err |= __put_user((u32)regs->flags, &sc->flags); + err |= __put_user((u32)regs->sp, &sc->sp_at_signal); tmp = save_i387_ia32(current, fpstate, regs, 0); if (tmp < 0) err = -EFAULT; - else { + else { clear_used_math(); stts(); err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL), @@ -392,40 +397,53 @@ ia32_setup_sigcontext(struct sigcontext_ /* * Determine which stack to use.. */ -static void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size) { - unsigned long rsp; + unsigned long sp; /* Default to using normal stack */ - rsp = regs->rsp; + sp = regs->sp; /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(rsp) == 0) - rsp = current->sas_ss_sp + current->sas_ss_size; + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; } /* This is the legacy signal stack switching. */ else if ((regs->ss & 0xffff) != __USER_DS && !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - rsp = (unsigned long) ka->sa.sa_restorer; - } + ka->sa.sa_restorer) + sp = (unsigned long) ka->sa.sa_restorer; - rsp -= frame_size; + sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - rsp = ((rsp + 4) & -16ul) - 4; - return (void __user *) rsp; + sp = ((sp + 4) & -16ul) - 4; + return (void __user *) sp; } int ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs * regs) + compat_sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; + void __user *restorer; int err = 0; + /* copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u16 poplmovl; + u32 val; + u16 int80; + u16 pad; + } __attribute__((packed)) code = { + 0xb858, /* popl %eax ; movl $...,%eax */ + __NR_ia32_sigreturn, + 0x80cd, /* int $0x80 */ + 0, + }; + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -443,64 +461,53 @@ int ia32_setup_frame(int sig, struct k_s if (_COMPAT_NSIG_WORDS > 1) { err |= __copy_to_user(frame->extramask, &set->sig[1], sizeof(frame->extramask)); + if (err) + goto give_sigsegv; } - if (err) - goto give_sigsegv; - /* Return stub is in 32bit vsyscall page */ - { - void __user *restorer; + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* Return stub is in 32bit vsyscall page */ if (current->binfmt->hasvdso) - restorer = VSYSCALL32_SIGRETURN; + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + sigreturn); else restorer = (void *)&frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - } - /* These are actually not used anymore, but left because some - gdb versions depend on them as a marker. */ - { - /* copy_to_user optimizes that into a single 8 byte store */ - static const struct { - u16 poplmovl; - u32 val; - u16 int80; - u16 pad; - } __attribute__((packed)) code = { - 0xb858, /* popl %eax ; movl $...,%eax */ - __NR_ia32_sigreturn, - 0x80cd, /* int $0x80 */ - 0, - }; - err |= __copy_to_user(frame->retcode, &code, 8); } + err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + err |= __copy_to_user(frame->retcode, &code, 8); if (err) goto give_sigsegv; /* Set up registers for signal handler */ - regs->rsp = (unsigned long) frame; - regs->rip = (unsigned long) ka->sa.sa_handler; + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; /* Make -mregparm=3 work */ - regs->rax = sig; - regs->rdx = 0; - regs->rcx = 0; + regs->ax = sig; + regs->dx = 0; + regs->cx = 0; - asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); - asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); - regs->cs = __USER32_CS; - regs->ss = __USER32_DS; + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; set_fs(USER_DS); - regs->eflags &= ~TF_MASK; + regs->flags &= ~TF_MASK; if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); #if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->rip, frame->pretcode); + printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", + current->comm, current->pid, frame, regs->ip, frame->pretcode); #endif return 0; @@ -511,25 +518,34 @@ give_sigsegv: } int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs * regs) + compat_sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; + struct exec_domain *ed = current_thread_info()->exec_domain; + void __user *restorer; int err = 0; + /* __copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u8 movl; + u32 val; + u16 int80; + u16 pad; + u8 pad2; + } __attribute__((packed)) code = { + 0xb8, + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, + }; + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - { - struct exec_domain *ed = current_thread_info()->exec_domain; - err |= __put_user((ed - && ed->signal_invmap - && sig < 32 - ? ed->signal_invmap[sig] - : sig), - &frame->sig); - } + err |= __put_user((ed && ed->signal_invmap && sig < 32 + ? ed->signal_invmap[sig] : sig), &frame->sig); err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); err |= copy_siginfo_to_user32(&frame->info, info); @@ -540,73 +556,58 @@ int ia32_setup_rt_frame(int sig, struct err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->rsp), + err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, - regs, set->sig[0]); + regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; - - { - void __user *restorer = VSYSCALL32_RTSIGRETURN; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - } - - /* This is movl $,%eax ; int $0x80 */ - /* Not actually used anymore, but left because some gdb versions - need it. */ - { - /* __copy_to_user optimizes that into a single 8 byte store */ - static const struct { - u8 movl; - u32 val; - u16 int80; - u16 pad; - u8 pad2; - } __attribute__((packed)) code = { - 0xb8, - __NR_ia32_rt_sigreturn, - 0x80cd, - 0, - }; - err |= __copy_to_user(frame->retcode, &code, 8); - } + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + else + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + rt_sigreturn); + err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + err |= __copy_to_user(frame->retcode, &code, 8); if (err) goto give_sigsegv; /* Set up registers for signal handler */ - regs->rsp = (unsigned long) frame; - regs->rip = (unsigned long) ka->sa.sa_handler; + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; /* Make -mregparm=3 work */ - regs->rax = sig; - regs->rdx = (unsigned long) &frame->info; - regs->rcx = (unsigned long) &frame->uc; + regs->ax = sig; + regs->dx = (unsigned long) &frame->info; + regs->cx = (unsigned long) &frame->uc; /* Make -mregparm=3 work */ - regs->rax = sig; - regs->rdx = (unsigned long) &frame->info; - regs->rcx = (unsigned long) &frame->uc; - - asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); - asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); - - regs->cs = __USER32_CS; - regs->ss = __USER32_DS; + regs->ax = sig; + regs->dx = (unsigned long) &frame->info; + regs->cx = (unsigned long) &frame->uc; + + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; set_fs(USER_DS); - regs->eflags &= ~TF_MASK; + regs->flags &= ~TF_MASK; if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); #if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->rip, frame->pretcode); + printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", + current->comm, current->pid, frame, regs->ip, frame->pretcode); #endif return 0; diff -puN arch/x86/ia32/ia32entry.S~git-x86 arch/x86/ia32/ia32entry.S --- a/arch/x86/ia32/ia32entry.S~git-x86 +++ a/arch/x86/ia32/ia32entry.S @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -104,7 +103,7 @@ ENTRY(ia32_sysenter_target) pushfq CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET rflags,0*/ - movl $VSYSCALL32_SYSEXIT, %r10d + movl 8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d CFI_REGISTER rip,r10 pushq $__USER32_CS CFI_ADJUST_CFA_OFFSET 8 @@ -142,6 +141,8 @@ sysenter_do_call: andl $~TS_COMPAT,threadinfo_status(%r10) /* clear IF, that popfq doesn't enable interrupts early */ andl $~0x200,EFLAGS-R11(%rsp) + movl RIP-R11(%rsp),%edx /* User %eip */ + CFI_REGISTER rip,rdx RESTORE_ARGS 1,24,1,1,1,1 popfq CFI_ADJUST_CFA_OFFSET -8 @@ -149,8 +150,6 @@ sysenter_do_call: popq %rcx /* User %esp */ CFI_ADJUST_CFA_OFFSET -8 CFI_REGISTER rsp,rcx - movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ - CFI_REGISTER rip,rdx TRACE_IRQS_ON swapgs sti /* sti only takes effect after the next instruction */ @@ -644,8 +643,8 @@ ia32_sys_call_table: .quad compat_sys_futex /* 240 */ .quad compat_sys_sched_setaffinity .quad compat_sys_sched_getaffinity - .quad sys32_set_thread_area - .quad sys32_get_thread_area + .quad sys_set_thread_area + .quad sys_get_thread_area .quad compat_sys_io_setup /* 245 */ .quad sys_io_destroy .quad compat_sys_io_getevents diff -puN arch/x86/ia32/ipc32.c~git-x86 arch/x86/ia32/ipc32.c --- a/arch/x86/ia32/ipc32.c~git-x86 +++ a/arch/x86/ia32/ipc32.c @@ -9,9 +9,8 @@ #include #include -asmlinkage long -sys32_ipc(u32 call, int first, int second, int third, - compat_uptr_t ptr, u32 fifth) +asmlinkage long sys32_ipc(u32 call, int first, int second, int third, + compat_uptr_t ptr, u32 fifth) { int version; @@ -19,36 +18,35 @@ sys32_ipc(u32 call, int first, int secon call &= 0xffff; switch (call) { - case SEMOP: + case SEMOP: /* struct sembuf is the same on 32 and 64bit :)) */ return sys_semtimedop(first, compat_ptr(ptr), second, NULL); - case SEMTIMEDOP: + case SEMTIMEDOP: return compat_sys_semtimedop(first, compat_ptr(ptr), second, compat_ptr(fifth)); - case SEMGET: + case SEMGET: return sys_semget(first, second, third); - case SEMCTL: + case SEMCTL: return compat_sys_semctl(first, second, third, compat_ptr(ptr)); - case MSGSND: + case MSGSND: return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); - case MSGRCV: + case MSGRCV: return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr)); - case MSGGET: + case MSGGET: return sys_msgget((key_t) first, second); - case MSGCTL: + case MSGCTL: return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: + case SHMAT: return compat_sys_shmat(first, second, third, version, compat_ptr(ptr)); - break; - case SHMDT: + case SHMDT: return sys_shmdt(compat_ptr(ptr)); - case SHMGET: + case SHMGET: return sys_shmget(first, (unsigned)second, third); - case SHMCTL: + case SHMCTL: return compat_sys_shmctl(first, second, compat_ptr(ptr)); } return -ENOSYS; diff -puN arch/x86/ia32/mmap32.c~git-x86 /dev/null --- a/arch/x86/ia32/mmap32.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * linux/arch/x86_64/ia32/mm/mmap.c - * - * flexible mmap layout support - * - * Based on the i386 version which was - * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * Started by Ingo Molnar - */ - -#include -#include -#include -#include - -/* - * Top of mmap area (just below the process stack). - * - * Leave an at least ~128 MB hole. - */ -#define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) - -static inline unsigned long mmap_base(struct mm_struct *mm) -{ - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; - unsigned long random_factor = 0; - - if (current->flags & PF_RANDOMIZE) - random_factor = get_random_int() % (1024*1024); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - random_factor); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void ia32_pick_mmap_layout(struct mm_struct *mm) -{ - /* - * Fall back to the standard layout if the personality - * bit is set, or if the expected stack growth is unlimited: - */ - if (sysctl_legacy_va_layout || - (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { - mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; - } else { - mm->mmap_base = mmap_base(mm); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; - } -} diff -puN arch/x86/ia32/ptrace32.c~git-x86 /dev/null --- a/arch/x86/ia32/ptrace32.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * 32bit ptrace for x86-64. - * - * Copyright 2001,2002 Andi Kleen, SuSE Labs. - * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier - * copyright. - * - * This allows to access 64bit processes too; but there is no way to see the extended - * register contents. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). - * Also masks reserved bits (31-22, 15, 5, 3, 1). - */ -#define FLAG_MASK 0x54dd5UL - -#define R32(l,q) \ - case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break - -static int putreg32(struct task_struct *child, unsigned regno, u32 val) -{ - int i; - __u64 *stack = (__u64 *)task_pt_regs(child); - - switch (regno) { - case offsetof(struct user32, regs.fs): - if (val && (val & 3) != 3) return -EIO; - child->thread.fsindex = val & 0xffff; - break; - case offsetof(struct user32, regs.gs): - if (val && (val & 3) != 3) return -EIO; - child->thread.gsindex = val & 0xffff; - break; - case offsetof(struct user32, regs.ds): - if (val && (val & 3) != 3) return -EIO; - child->thread.ds = val & 0xffff; - break; - case offsetof(struct user32, regs.es): - child->thread.es = val & 0xffff; - break; - case offsetof(struct user32, regs.ss): - if ((val & 3) != 3) return -EIO; - stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff; - break; - case offsetof(struct user32, regs.cs): - if ((val & 3) != 3) return -EIO; - stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff; - break; - - R32(ebx, rbx); - R32(ecx, rcx); - R32(edx, rdx); - R32(edi, rdi); - R32(esi, rsi); - R32(ebp, rbp); - R32(eax, rax); - R32(orig_eax, orig_rax); - R32(eip, rip); - R32(esp, rsp); - - case offsetof(struct user32, regs.eflags): { - __u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8]; - val &= FLAG_MASK; - *flags = val | (*flags & ~FLAG_MASK); - break; - } - - case offsetof(struct user32, u_debugreg[4]): - case offsetof(struct user32, u_debugreg[5]): - return -EIO; - - case offsetof(struct user32, u_debugreg[0]): - child->thread.debugreg0 = val; - break; - - case offsetof(struct user32, u_debugreg[1]): - child->thread.debugreg1 = val; - break; - - case offsetof(struct user32, u_debugreg[2]): - child->thread.debugreg2 = val; - break; - - case offsetof(struct user32, u_debugreg[3]): - child->thread.debugreg3 = val; - break; - - case offsetof(struct user32, u_debugreg[6]): - child->thread.debugreg6 = val; - break; - - case offsetof(struct user32, u_debugreg[7]): - val &= ~DR_CONTROL_RESERVED; - /* See arch/i386/kernel/ptrace.c for an explanation of - * this awkward check.*/ - for(i=0; i<4; i++) - if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = val; - if (val) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; - - default: - if (regno > sizeof(struct user32) || (regno & 3)) - return -EIO; - - /* Other dummy fields in the virtual user structure are ignored */ - break; - } - return 0; -} - -#undef R32 - -#define R32(l,q) \ - case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break - -static int getreg32(struct task_struct *child, unsigned regno, u32 *val) -{ - __u64 *stack = (__u64 *)task_pt_regs(child); - - switch (regno) { - case offsetof(struct user32, regs.fs): - *val = child->thread.fsindex; - break; - case offsetof(struct user32, regs.gs): - *val = child->thread.gsindex; - break; - case offsetof(struct user32, regs.ds): - *val = child->thread.ds; - break; - case offsetof(struct user32, regs.es): - *val = child->thread.es; - break; - - R32(cs, cs); - R32(ss, ss); - R32(ebx, rbx); - R32(ecx, rcx); - R32(edx, rdx); - R32(edi, rdi); - R32(esi, rsi); - R32(ebp, rbp); - R32(eax, rax); - R32(orig_eax, orig_rax); - R32(eip, rip); - R32(eflags, eflags); - R32(esp, rsp); - - case offsetof(struct user32, u_debugreg[0]): - *val = child->thread.debugreg0; - break; - case offsetof(struct user32, u_debugreg[1]): - *val = child->thread.debugreg1; - break; - case offsetof(struct user32, u_debugreg[2]): - *val = child->thread.debugreg2; - break; - case offsetof(struct user32, u_debugreg[3]): - *val = child->thread.debugreg3; - break; - case offsetof(struct user32, u_debugreg[6]): - *val = child->thread.debugreg6; - break; - case offsetof(struct user32, u_debugreg[7]): - *val = child->thread.debugreg7; - break; - - default: - if (regno > sizeof(struct user32) || (regno & 3)) - return -EIO; - - /* Other dummy fields in the virtual user structure are ignored */ - *val = 0; - break; - } - return 0; -} - -#undef R32 - -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) -{ - int ret; - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; - } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); - } - return ret; -} - -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - struct task_struct *child; - struct pt_regs *childregs; - void __user *datap = compat_ptr(data); - int ret; - __u32 val; - - switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: - return sys_ptrace(request, pid, addr, data); - - default: - return -EINVAL; - - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: - break; - - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; - - childregs = task_pt_regs(child); - - switch (request) { - case PTRACE_PEEKDATA: - case PTRACE_PEEKTEXT: - ret = 0; - if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32)) - ret = -EIO; - else - ret = put_user(val, (unsigned int __user *)datap); - break; - - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - ret = 0; - if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32)) - ret = -EIO; - break; - - case PTRACE_PEEKUSR: - ret = getreg32(child, addr, &val); - if (ret == 0) - ret = put_user(val, (__u32 __user *)datap); - break; - - case PTRACE_POKEUSR: - ret = putreg32(child, addr, data); - break; - - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - int i; - if (!access_ok(VERIFY_WRITE, datap, 16*4)) { - ret = -EIO; - break; - } - ret = 0; - for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) { - getreg32(child, i, &val); - ret |= __put_user(val,(u32 __user *)datap); - datap += sizeof(u32); - } - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - int i; - if (!access_ok(VERIFY_READ, datap, 16*4)) { - ret = -EIO; - break; - } - ret = 0; - for ( i = 0; i <= 16*4; i += sizeof(u32) ) { - ret |= __get_user(tmp, (u32 __user *)datap); - putreg32(child, i, tmp); - datap += sizeof(u32); - } - break; - } - - case PTRACE_GETFPREGS: - ret = -EIO; - if (!access_ok(VERIFY_READ, compat_ptr(data), - sizeof(struct user_i387_struct))) - break; - save_i387_ia32(child, datap, childregs, 1); - ret = 0; - break; - - case PTRACE_SETFPREGS: - ret = -EIO; - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_i387_struct))) - break; - ret = 0; - /* don't check EFAULT to be bug-to-bug compatible to i386 */ - restore_i387_ia32(child, datap, 1); - break; - - case PTRACE_GETFPXREGS: { - struct user32_fxsr_struct __user *u = datap; - init_fpu(child); - ret = -EIO; - if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) - break; - ret = -EFAULT; - if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u))) - break; - ret = __put_user(childregs->cs, &u->fcs); - ret |= __put_user(child->thread.ds, &u->fos); - break; - } - case PTRACE_SETFPXREGS: { - struct user32_fxsr_struct __user *u = datap; - unlazy_fpu(child); - ret = -EIO; - if (!access_ok(VERIFY_READ, u, sizeof(*u))) - break; - /* no checking to be bug-to-bug compatible with i386. */ - /* but silence warning */ - if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u))) - ; - set_stopped_child_used_math(child); - child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - ret = 0; - break; - } - - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data)); - break; - - default: - BUG(); - } - - out: - put_task_struct(child); - return ret; -} - diff -puN arch/x86/ia32/sys_ia32.c~git-x86 arch/x86/ia32/sys_ia32.c --- a/arch/x86/ia32/sys_ia32.c~git-x86 +++ a/arch/x86/ia32/sys_ia32.c @@ -1,29 +1,29 @@ /* * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on - * sys_sparc32 + * sys_sparc32 * * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger - * Copyright (C) 1999 Arun Sharma - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1999 Arun Sharma + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang - * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) + * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) * * These routines maintain argument size conversion between 32bit and 64bit - * environment. In 2.5 most of this should be moved to a generic directory. + * environment. In 2.5 most of this should be moved to a generic directory. * * This file assumes that there is a hole at the end of user address space. - * - * Some of the functions are LE specific currently. These are hopefully all marked. - * This should be fixed. + * + * Some of the functions are LE specific currently. These are + * hopefully all marked. This should be fixed. */ #include #include -#include -#include +#include +#include #include #include #include @@ -90,43 +90,44 @@ int cp_compat_stat(struct kstat *kbuf, s if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino) return -EOVERFLOW; if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) || - __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) || - __put_user (ino, &ubuf->st_ino) || - __put_user (kbuf->mode, &ubuf->st_mode) || - __put_user (kbuf->nlink, &ubuf->st_nlink) || - __put_user (uid, &ubuf->st_uid) || - __put_user (gid, &ubuf->st_gid) || - __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || - __put_user (kbuf->size, &ubuf->st_size) || - __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) || - __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) || - __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) || - __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user (kbuf->blksize, &ubuf->st_blksize) || - __put_user (kbuf->blocks, &ubuf->st_blocks)) + __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) || + __put_user(ino, &ubuf->st_ino) || + __put_user(kbuf->mode, &ubuf->st_mode) || + __put_user(kbuf->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || + __put_user(kbuf->size, &ubuf->st_size) || + __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) || + __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(kbuf->blksize, &ubuf->st_blksize) || + __put_user(kbuf->blocks, &ubuf->st_blocks)) return -EFAULT; return 0; } -asmlinkage long -sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high) +asmlinkage long sys32_truncate64(char __user *filename, + unsigned long offset_low, + unsigned long offset_high) { return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); } -asmlinkage long -sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high) +asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, + unsigned long offset_high) { return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } -/* Another set for IA32/LFS -- x86_64 struct stat is different due to - support for 64bit inode numbers. */ - -static int -cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) +/* + * Another set for IA32/LFS -- x86_64 struct stat is different due to + * support for 64bit inode numbers. + */ +static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) { typeof(ubuf->st_uid) uid = 0; typeof(ubuf->st_gid) gid = 0; @@ -134,38 +135,39 @@ cp_stat64(struct stat64 __user *ubuf, st SET_GID(gid, stat->gid); if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || - __put_user (stat->ino, &ubuf->__st_ino) || - __put_user (stat->ino, &ubuf->st_ino) || - __put_user (stat->mode, &ubuf->st_mode) || - __put_user (stat->nlink, &ubuf->st_nlink) || - __put_user (uid, &ubuf->st_uid) || - __put_user (gid, &ubuf->st_gid) || - __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) || - __put_user (stat->size, &ubuf->st_size) || - __put_user (stat->atime.tv_sec, &ubuf->st_atime) || - __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) || - __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) || - __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user (stat->blksize, &ubuf->st_blksize) || - __put_user (stat->blocks, &ubuf->st_blocks)) + __put_user(stat->ino, &ubuf->__st_ino) || + __put_user(stat->ino, &ubuf->st_ino) || + __put_user(stat->mode, &ubuf->st_mode) || + __put_user(stat->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) || + __put_user(stat->size, &ubuf->st_size) || + __put_user(stat->atime.tv_sec, &ubuf->st_atime) || + __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(stat->blksize, &ubuf->st_blksize) || + __put_user(stat->blocks, &ubuf->st_blocks)) return -EFAULT; return 0; } -asmlinkage long -sys32_stat64(char __user * filename, struct stat64 __user *statbuf) +asmlinkage long sys32_stat64(char __user *filename, + struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); + if (!ret) ret = cp_stat64(statbuf, &stat); return ret; } -asmlinkage long -sys32_lstat64(char __user * filename, struct stat64 __user *statbuf) +asmlinkage long sys32_lstat64(char __user *filename, + struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -174,8 +176,7 @@ sys32_lstat64(char __user * filename, st return ret; } -asmlinkage long -sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -184,9 +185,8 @@ sys32_fstat64(unsigned int fd, struct st return ret; } -asmlinkage long -sys32_fstatat(unsigned int dfd, char __user *filename, - struct stat64 __user* statbuf, int flag) +asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, + struct stat64 __user *statbuf, int flag) { struct kstat stat; int error = -EINVAL; @@ -221,8 +221,7 @@ struct mmap_arg_struct { unsigned int offset; }; -asmlinkage long -sys32_mmap(struct mmap_arg_struct __user *arg) +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) { struct mmap_arg_struct a; struct file *file = NULL; @@ -233,33 +232,33 @@ sys32_mmap(struct mmap_arg_struct __user return -EFAULT; if (a.offset & ~PAGE_MASK) - return -EINVAL; + return -EINVAL; if (!(a.flags & MAP_ANONYMOUS)) { file = fget(a.fd); if (!file) return -EBADF; } - - mm = current->mm; - down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT); + + mm = current->mm; + down_write(&mm->mmap_sem); + retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, + a.offset>>PAGE_SHIFT); if (file) fput(file); - up_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); return retval; } -asmlinkage long -sys32_mprotect(unsigned long start, size_t len, unsigned long prot) +asmlinkage long sys32_mprotect(unsigned long start, size_t len, + unsigned long prot) { - return sys_mprotect(start,len,prot); + return sys_mprotect(start, len, prot); } -asmlinkage long -sys32_pipe(int __user *fd) +asmlinkage long sys32_pipe(int __user *fd) { int retval; int fds[2]; @@ -269,13 +268,13 @@ sys32_pipe(int __user *fd) goto out; if (copy_to_user(fd, fds, sizeof(fds))) retval = -EFAULT; - out: +out: return retval; } -asmlinkage long -sys32_rt_sigaction(int sig, struct sigaction32 __user *act, - struct sigaction32 __user *oact, unsigned int sigsetsize) +asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, + struct sigaction32 __user *oact, + unsigned int sigsetsize) { struct k_sigaction new_ka, old_ka; int ret; @@ -291,12 +290,17 @@ sys32_rt_sigaction(int sig, struct sigac if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(restorer, &act->sa_restorer)|| - __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t))) + __get_user(restorer, &act->sa_restorer) || + __copy_from_user(&set32, &act->sa_mask, + sizeof(compat_sigset_t))) return -EFAULT; new_ka.sa.sa_handler = compat_ptr(handler); new_ka.sa.sa_restorer = compat_ptr(restorer); - /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ switch (_NSIG_WORDS) { case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] | (((long)set32.sig[7]) << 32); @@ -312,7 +316,10 @@ sys32_rt_sigaction(int sig, struct sigac ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ switch (_NSIG_WORDS) { case 4: set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); @@ -328,23 +335,26 @@ sys32_rt_sigaction(int sig, struct sigac set32.sig[0] = old_ka.sa.sa_mask.sig[0]; } if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t))) + __copy_to_user(&oact->sa_mask, &set32, + sizeof(compat_sigset_t))) return -EFAULT; } return ret; } -asmlinkage long -sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact) +asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) { - struct k_sigaction new_ka, old_ka; - int ret; + struct k_sigaction new_ka, old_ka; + int ret; - if (act) { + if (act) { compat_old_sigset_t mask; compat_uptr_t handler, restorer; @@ -359,33 +369,35 @@ sys32_sigaction (int sig, struct old_sig new_ka.sa.sa_restorer = compat_ptr(restorer); siginitset(&new_ka.sa.sa_mask, mask); - } + } - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - } + } return ret; } -asmlinkage long -sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, unsigned int sigsetsize) +asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, + unsigned int sigsetsize) { sigset_t s; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); - + if (set) { - if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + if (copy_from_user(&s32, set, sizeof(compat_sigset_t))) return -EFAULT; switch (_NSIG_WORDS) { case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); @@ -394,13 +406,14 @@ sys32_rt_sigprocmask(int how, compat_sig case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_rt_sigprocmask(how, set ? (sigset_t __user *)&s : NULL, oset ? (sigset_t __user *)&s : NULL, - sigsetsize); - set_fs (old_fs); - if (ret) return ret; + sigsetsize); + set_fs(old_fs); + if (ret) + return ret; if (oset) { switch (_NSIG_WORDS) { case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; @@ -408,52 +421,49 @@ sys32_rt_sigprocmask(int how, compat_sig case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; } - if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + if (copy_to_user(oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } return 0; } -static inline long -get_tv32(struct timeval *o, struct compat_timeval __user *i) +static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) { - int err = -EFAULT; - if (access_ok(VERIFY_READ, i, sizeof(*i))) { + int err = -EFAULT; + + if (access_ok(VERIFY_READ, i, sizeof(*i))) { err = __get_user(o->tv_sec, &i->tv_sec); err |= __get_user(o->tv_usec, &i->tv_usec); } - return err; + return err; } -static inline long -put_tv32(struct compat_timeval __user *o, struct timeval *i) +static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) { int err = -EFAULT; - if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { + + if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { err = __put_user(i->tv_sec, &o->tv_sec); err |= __put_user(i->tv_usec, &o->tv_usec); - } - return err; + } + return err; } -extern unsigned int alarm_setitimer(unsigned int seconds); - -asmlinkage long -sys32_alarm(unsigned int seconds) +asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +/* + * Translations due to time_t size differences. Which affects all + * sorts of things, like timeval and itimerval. + */ +asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) { if (tv) { struct timeval ktv; + do_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; @@ -465,14 +475,14 @@ sys32_gettimeofday(struct compat_timeval return 0; } -asmlinkage long -sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) { struct timeval ktv; struct timespec kts; struct timezone ktz; - if (tv) { + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; kts.tv_sec = ktv.tv_sec; @@ -494,8 +504,7 @@ struct sel_arg_struct { unsigned int tvp; }; -asmlinkage long -sys32_old_select(struct sel_arg_struct __user *arg) +asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg) { struct sel_arg_struct a; @@ -505,50 +514,45 @@ sys32_old_select(struct sel_arg_struct _ compat_ptr(a.exp), compat_ptr(a.tvp)); } -extern asmlinkage long -compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options, - struct compat_rusage *ru); - -asmlinkage long -sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options) +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, + int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); } /* 32-bit timeval and related flotsam. */ -asmlinkage long -sys32_sysfs(int option, u32 arg1, u32 arg2) +asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); } -asmlinkage long -sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) { struct timespec t; int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); - set_fs (old_fs); + set_fs(old_fs); if (put_compat_timespec(&t, interval)) return -EFAULT; return ret; } -asmlinkage long -sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); + + set_fs(KERNEL_DS); ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); - set_fs (old_fs); + set_fs(old_fs); if (!ret) { switch (_NSIG_WORDS) { case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; @@ -556,30 +560,29 @@ sys32_rt_sigpending(compat_sigset_t __us case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; } - if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + if (copy_to_user(set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } return ret; } -asmlinkage long -sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) +asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, + compat_siginfo_t __user *uinfo) { siginfo_t info; int ret; mm_segment_t old_fs = get_fs(); - + if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); - set_fs (old_fs); + set_fs(old_fs); return ret; } /* These are here just in case some old ia32 binary calls it. */ -asmlinkage long -sys32_pause(void) +asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; schedule(); @@ -599,25 +602,25 @@ struct sysctl_ia32 { }; -asmlinkage long -sys32_sysctl(struct sysctl_ia32 __user *args32) +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) { struct sysctl_ia32 a32; - mm_segment_t old_fs = get_fs (); + mm_segment_t old_fs = get_fs(); void __user *oldvalp, *newvalp; size_t oldlen; int __user *namep; long ret; - if (copy_from_user(&a32, args32, sizeof (a32))) + if (copy_from_user(&a32, args32, sizeof(a32))) return -EFAULT; /* - * We need to pre-validate these because we have to disable address checking - * before calling do_sysctl() because of OLDLEN but we can't run the risk of the - * user specifying bad addresses here. Well, since we're dealing with 32 bit - * addresses, we KNOW that access_ok() will always succeed, so this is an - * expensive NOP, but so what... + * We need to pre-validate these because we have to disable + * address checking before calling do_sysctl() because of + * OLDLEN but we can't run the risk of the user specifying bad + * addresses here. Well, since we're dealing with 32 bit + * addresses, we KNOW that access_ok() will always succeed, so + * this is an expensive NOP, but so what... */ namep = compat_ptr(a32.name); oldvalp = compat_ptr(a32.oldval); @@ -636,34 +639,34 @@ sys32_sysctl(struct sysctl_ia32 __user * unlock_kernel(); set_fs(old_fs); - if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp))) + if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) return -EFAULT; return ret; } #endif -/* warning: next two assume little endian */ -asmlinkage long -sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +/* warning: next two assume little endian */ +asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) { return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long -sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) { return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long -sys32_personality(unsigned long personality) +asmlinkage long sys32_personality(unsigned long personality) { int ret; - if (personality(current->personality) == PER_LINUX32 && + + if (personality(current->personality) == PER_LINUX32 && personality == PER_LINUX) personality = PER_LINUX32; ret = sys_personality(personality); @@ -672,34 +675,33 @@ sys32_personality(unsigned long personal return ret; } -asmlinkage long -sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) +asmlinkage long sys32_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, s32 count) { mm_segment_t old_fs = get_fs(); int ret; off_t of; - + if (offset && get_user(of, offset)) return -EFAULT; - + set_fs(KERNEL_DS); ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, count); set_fs(old_fs); - + if (offset && put_user(of, offset)) return -EFAULT; - return ret; } asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct mm_struct *mm = current->mm; unsigned long error; - struct file * file = NULL; + struct file *file = NULL; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -717,36 +719,35 @@ asmlinkage long sys32_mmap2(unsigned lon return error; } -asmlinkage long sys32_olduname(struct oldold_utsname __user * name) +asmlinkage long sys32_olduname(struct oldold_utsname __user *name) { + char *arch = "x86_64"; int err; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) return -EFAULT; - - down_read(&uts_sem); - err = __copy_to_user(&name->sysname,&utsname()->sysname, - __OLD_UTS_LEN); - err |= __put_user(0,name->sysname+__OLD_UTS_LEN); - err |= __copy_to_user(&name->nodename,&utsname()->nodename, - __OLD_UTS_LEN); - err |= __put_user(0,name->nodename+__OLD_UTS_LEN); - err |= __copy_to_user(&name->release,&utsname()->release, - __OLD_UTS_LEN); - err |= __put_user(0,name->release+__OLD_UTS_LEN); - err |= __copy_to_user(&name->version,&utsname()->version, - __OLD_UTS_LEN); - err |= __put_user(0,name->version+__OLD_UTS_LEN); - { - char *arch = "x86_64"; - if (personality(current->personality) == PER_LINUX32) - arch = "i686"; - - err |= __copy_to_user(&name->machine, arch, strlen(arch)+1); - } + down_read(&uts_sem); + + err = __copy_to_user(&name->sysname, &utsname()->sysname, + __OLD_UTS_LEN); + err |= __put_user(0, name->sysname+__OLD_UTS_LEN); + err |= __copy_to_user(&name->nodename, &utsname()->nodename, + __OLD_UTS_LEN); + err |= __put_user(0, name->nodename+__OLD_UTS_LEN); + err |= __copy_to_user(&name->release, &utsname()->release, + __OLD_UTS_LEN); + err |= __put_user(0, name->release+__OLD_UTS_LEN); + err |= __copy_to_user(&name->version, &utsname()->version, + __OLD_UTS_LEN); + err |= __put_user(0, name->version+__OLD_UTS_LEN); + + if (personality(current->personality) == PER_LINUX32) + arch = "i686"; + + err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1); up_read(&uts_sem); @@ -755,17 +756,19 @@ asmlinkage long sys32_olduname(struct ol return err; } -long sys32_uname(struct old_utsname __user * name) +long sys32_uname(struct old_utsname __user *name) { int err; + if (!name) return -EFAULT; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) + if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); - return err?-EFAULT:0; + + return err ? -EFAULT : 0; } long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) @@ -773,27 +776,28 @@ long sys32_ustat(unsigned dev, struct us struct ustat u; mm_segment_t seg; int ret; - - seg = get_fs(); - set_fs(KERNEL_DS); + + seg = get_fs(); + set_fs(KERNEL_DS); ret = sys_ustat(dev, (struct ustat __user *)&u); set_fs(seg); - if (ret >= 0) { - if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || - __put_user((__u32) u.f_tfree, &u32p->f_tfree) || - __put_user((__u32) u.f_tinode, &u32p->f_tfree) || - __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || - __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) - ret = -EFAULT; - } + if (ret < 0) + return ret; + + if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) || + __put_user((__u32) u.f_tfree, &u32p->f_tfree) || + __put_user((__u32) u.f_tinode, &u32p->f_tfree) || + __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || + __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) + ret = -EFAULT; return ret; -} +} asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -812,18 +816,19 @@ asmlinkage long sys32_execve(char __user asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs *regs) { - void __user *parent_tid = (void __user *)regs->rdx; - void __user *child_tid = (void __user *)regs->rdi; + void __user *parent_tid = (void __user *)regs->dx; + void __user *child_tid = (void __user *)regs->di; + if (!newsp) - newsp = regs->rsp; - return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } /* - * Some system calls that need sign extended arguments. This could be done by a generic wrapper. - */ - -long sys32_lseek (unsigned int fd, int offset, unsigned int whence) + * Some system calls that need sign extended arguments. This could be + * done by a generic wrapper. + */ +long sys32_lseek(unsigned int fd, int offset, unsigned int whence) { return sys_lseek(fd, offset, whence); } @@ -832,49 +837,52 @@ long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); } - -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, + +long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, __u32 len_low, __u32 len_high, int advice) -{ +{ return sys_fadvise64_64(fd, (((u64)offset_high)<<32) | offset_low, (((u64)len_high)<<32) | len_low, - advice); -} + advice); +} long sys32_vm86_warning(void) -{ +{ struct task_struct *me = current; static char lastcomm[sizeof(me->comm)]; + if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { - compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n", - me->comm); + compat_printk(KERN_INFO + "%s: vm86 mode not supported on 64 bit kernel\n", + me->comm); strncpy(lastcomm, me->comm, sizeof(lastcomm)); - } + } return -ENOSYS; -} +} long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, - char __user * buf, size_t len) + char __user *buf, size_t len) { return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count) +asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, + size_t count) { return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, - unsigned n_low, unsigned n_hi, int flags) + unsigned n_low, unsigned n_hi, int flags) { return sys_sync_file_range(fd, ((u64)off_hi << 32) | off_low, ((u64)n_hi << 32) | n_low, flags); } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len, - int advice) +asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, + size_t len, int advice) { return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, len, advice); diff -puN arch/x86/ia32/syscall32.c~git-x86 /dev/null --- a/arch/x86/ia32/syscall32.c +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright 2002,2003 Andi Kleen, SuSE Labs */ - -/* vsyscall handling for 32bit processes. Map a stub page into it - on demand because 32bit cannot reach the kernel's fixmaps */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; -extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; -extern int sysctl_vsyscall32; - -static struct page *syscall32_pages[1]; -static int use_sysenter = -1; - -struct linux_binprm; - -/* Setup a VMA at program startup for the vsyscall page */ -int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) -{ - struct mm_struct *mm = current->mm; - int ret; - - down_write(&mm->mmap_sem); - /* - * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - /* Could randomize here */ - ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - syscall32_pages); - up_write(&mm->mmap_sem); - return ret; -} - -static int __init init_syscall32(void) -{ - char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); - if (!syscall32_page) - panic("Cannot allocate syscall32 page"); - syscall32_pages[0] = virt_to_page(syscall32_page); - if (use_sysenter > 0) { - memcpy(syscall32_page, syscall32_sysenter, - syscall32_sysenter_end - syscall32_sysenter); - } else { - memcpy(syscall32_page, syscall32_syscall, - syscall32_syscall_end - syscall32_syscall); - } - return 0; -} - -__initcall(init_syscall32); - -/* May not be __init: called during resume */ -void syscall32_cpu_init(void) -{ - if (use_sysenter < 0) - use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); - - /* Load these always in case some future AMD CPU supports - SYSENTER from compat mode too. */ - checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); - checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); - - wrmsrl(MSR_CSTAR, ia32_cstar_target); -} diff -puN arch/x86/ia32/syscall32_syscall.S~git-x86 /dev/null --- a/arch/x86/ia32/syscall32_syscall.S +++ /dev/null @@ -1,17 +0,0 @@ -/* 32bit VDSOs mapped into user space. */ - - .section ".init.data","aw" - - .globl syscall32_syscall - .globl syscall32_syscall_end - -syscall32_syscall: - .incbin "arch/x86/ia32/vsyscall-syscall.so" -syscall32_syscall_end: - - .globl syscall32_sysenter - .globl syscall32_sysenter_end - -syscall32_sysenter: - .incbin "arch/x86/ia32/vsyscall-sysenter.so" -syscall32_sysenter_end: diff -puN arch/x86/ia32/tls32.c~git-x86 /dev/null --- a/arch/x86/ia32/tls32.c +++ /dev/null @@ -1,163 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * sys_alloc_thread_area: get a yet unused TLS descriptor index. - */ -static int get_free_idx(void) -{ - struct thread_struct *t = ¤t->thread; - int idx; - - for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) - if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx)) - return idx + GDT_ENTRY_TLS_MIN; - return -ESRCH; -} - -/* - * Set a given TLS descriptor: - * When you want addresses > 32bit use arch_prctl() - */ -int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info) -{ - struct user_desc info; - struct n_desc_struct *desc; - int cpu, idx; - - if (copy_from_user(&info, u_info, sizeof(info))) - return -EFAULT; - - idx = info.entry_number; - - /* - * index -1 means the kernel should try to find and - * allocate an empty descriptor: - */ - if (idx == -1) { - idx = get_free_idx(); - if (idx < 0) - return idx; - if (put_user(idx, &u_info->entry_number)) - return -EFAULT; - } - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN; - - /* - * We must not get preempted while modifying the TLS. - */ - cpu = get_cpu(); - - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - if (t == ¤t->thread) - load_TLS(t, cpu); - - put_cpu(); - return 0; -} - -asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info) -{ - return do_set_thread_area(¤t->thread, u_info); -} - - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) -#define GET_LONGMODE(desc) (((desc)->b >> 21) & 1) - -int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info) -{ - struct user_desc info; - struct n_desc_struct *desc; - int idx; - - if (get_user(idx, &u_info->entry_number)) - return -EFAULT; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN; - - memset(&info, 0, sizeof(struct user_desc)); - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - info.lm = GET_LONGMODE(desc); - - if (copy_to_user(u_info, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info) -{ - return do_get_thread_area(¤t->thread, u_info); -} - - -int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs) -{ - struct n_desc_struct *desc; - struct user_desc info; - struct user_desc __user *cp; - int idx; - - cp = (void __user *)childregs->rsi; - if (copy_from_user(&info, cp, sizeof(info))) - return -EFAULT; - if (LDT_empty(&info)) - return -EINVAL; - - idx = info.entry_number; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN; - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - - return 0; -} diff -puN arch/x86/ia32/vsyscall-sigreturn.S~git-x86 /dev/null --- a/arch/x86/ia32/vsyscall-sigreturn.S +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Common code for the sigreturn entry points on the vsyscall page. - * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80) - * to enter the kernel. - * This file is #include'd by vsyscall-*.S to define them after the - * vsyscall entry point. The addresses we get for these entry points - * by doing ".balign 32" must match in both versions of the page. - */ - - .code32 - .section .text.sigreturn,"ax" - .balign 32 - .globl __kernel_sigreturn - .type __kernel_sigreturn,@function -__kernel_sigreturn: -.LSTART_sigreturn: - popl %eax - movl $__NR_ia32_sigreturn, %eax - SYSCALL_ENTER_KERNEL -.LEND_sigreturn: - .size __kernel_sigreturn,.-.LSTART_sigreturn - - .section .text.rtsigreturn,"ax" - .balign 32 - .globl __kernel_rt_sigreturn - .type __kernel_rt_sigreturn,@function -__kernel_rt_sigreturn: -.LSTART_rt_sigreturn: - movl $__NR_ia32_rt_sigreturn, %eax - SYSCALL_ENTER_KERNEL -.LEND_rt_sigreturn: - .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn - - .section .eh_frame,"a",@progbits -.LSTARTFRAMES: - .long .LENDCIES-.LSTARTCIES -.LSTARTCIES: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zRS" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0x0c /* DW_CFA_def_cfa */ - .uleb128 4 - .uleb128 4 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .uleb128 1 - .align 4 -.LENDCIES: - - .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */ -.LSTARTFDE2: - .long .LSTARTFDE2-.LSTARTFRAMES /* CIE pointer */ - /* HACK: The dwarf2 unwind routines will subtract 1 from the - return address to get an address in the middle of the - presumed call instruction. Since we didn't get here via - a call, we need to include the nop before the real start - to make up for it. */ - .long .LSTART_sigreturn-1-. /* PC-relative start address */ - .long .LEND_sigreturn-.LSTART_sigreturn+1 - .uleb128 0 /* Augmentation length */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - complicated by the fact that the "CFA" is always assumed to - be the value of the stack pointer in the caller. This means - that we must define the CFA of this body of code to be the - saved value of the stack pointer in the sigcontext. Which - also means that there is no fixed relation to the other - saved registers, which means that we must use DW_CFA_expression - to compute their addresses. It also means that when we - adjust the stack with the popl, we have to do it all over again. */ - -#define do_cfa_expr(offset) \ - .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ - .byte 0x06; /* DW_OP_deref */ \ -1: - -#define do_expr(regno, offset) \ - .byte 0x10; /* DW_CFA_expression */ \ - .uleb128 regno; /* regno */ \ - .uleb128 1f-0f; /* length */ \ -0: .byte 0x74; /* DW_OP_breg4 */ \ - .sleb128 offset; /* offset */ \ -1: - - do_cfa_expr(IA32_SIGCONTEXT_esp+4) - do_expr(0, IA32_SIGCONTEXT_eax+4) - do_expr(1, IA32_SIGCONTEXT_ecx+4) - do_expr(2, IA32_SIGCONTEXT_edx+4) - do_expr(3, IA32_SIGCONTEXT_ebx+4) - do_expr(5, IA32_SIGCONTEXT_ebp+4) - do_expr(6, IA32_SIGCONTEXT_esi+4) - do_expr(7, IA32_SIGCONTEXT_edi+4) - do_expr(8, IA32_SIGCONTEXT_eip+4) - - .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ - - do_cfa_expr(IA32_SIGCONTEXT_esp) - do_expr(0, IA32_SIGCONTEXT_eax) - do_expr(1, IA32_SIGCONTEXT_ecx) - do_expr(2, IA32_SIGCONTEXT_edx) - do_expr(3, IA32_SIGCONTEXT_ebx) - do_expr(5, IA32_SIGCONTEXT_ebp) - do_expr(6, IA32_SIGCONTEXT_esi) - do_expr(7, IA32_SIGCONTEXT_edi) - do_expr(8, IA32_SIGCONTEXT_eip) - - .align 4 -.LENDFDE2: - - .long .LENDFDE3-.LSTARTFDE3 /* Length FDE */ -.LSTARTFDE3: - .long .LSTARTFDE3-.LSTARTFRAMES /* CIE pointer */ - /* HACK: See above wrt unwind library assumptions. */ - .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ - .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 - .uleb128 0 /* Augmentation */ - /* What follows are the instructions for the table generation. - We record the locations of each register saved. This is - slightly less complicated than the above, since we don't - modify the stack pointer in the process. */ - - do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp) - do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax) - do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx) - do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx) - do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx) - do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp) - do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi) - do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi) - do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip) - - .align 4 -.LENDFDE3: - -#include "../../x86/kernel/vsyscall-note_32.S" - diff -puN arch/x86/ia32/vsyscall-syscall.S~git-x86 /dev/null --- a/arch/x86/ia32/vsyscall-syscall.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Code for the vsyscall page. This version uses the syscall instruction. - */ - -#include -#include -#include - - .code32 - .text - .section .text.vsyscall,"ax" - .globl __kernel_vsyscall - .type __kernel_vsyscall,@function -__kernel_vsyscall: -.LSTART_vsyscall: - push %ebp -.Lpush_ebp: - movl %ecx, %ebp - syscall - movl $__USER32_DS, %ecx - movl %ecx, %ss - movl %ebp, %ecx - popl %ebp -.Lpop_ebp: - ret -.LEND_vsyscall: - .size __kernel_vsyscall,.-.LSTART_vsyscall - - .section .eh_frame,"a",@progbits -.LSTARTFRAME: - .long .LENDCIE-.LSTARTCIE -.LSTARTCIE: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0x0c /* DW_CFA_def_cfa */ - .uleb128 4 - .uleb128 4 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .uleb128 1 - .align 4 -.LENDCIE: - - .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ -.LSTARTFDE1: - .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ - .long .LSTART_vsyscall-. /* PC-relative start address */ - .long .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 /* Augmentation length */ - /* What follows are the instructions for the table generation. - We have to record all changes of the stack pointer. */ - .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .uleb128 8 - .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */ - .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */ - .byte 0xc5 /* DW_CFA_restore %ebp */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .uleb128 4 - .align 4 -.LENDFDE1: - -#define SYSCALL_ENTER_KERNEL syscall -#include "vsyscall-sigreturn.S" diff -puN arch/x86/ia32/vsyscall-sysenter.S~git-x86 /dev/null --- a/arch/x86/ia32/vsyscall-sysenter.S +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Code for the vsyscall page. This version uses the sysenter instruction. - */ - -#include -#include - - .code32 - .text - .section .text.vsyscall,"ax" - .globl __kernel_vsyscall - .type __kernel_vsyscall,@function -__kernel_vsyscall: -.LSTART_vsyscall: - push %ecx -.Lpush_ecx: - push %edx -.Lpush_edx: - push %ebp -.Lenter_kernel: - movl %esp,%ebp - sysenter - .space 7,0x90 - jmp .Lenter_kernel - /* 16: System call normal return point is here! */ - pop %ebp -.Lpop_ebp: - pop %edx -.Lpop_edx: - pop %ecx -.Lpop_ecx: - ret -.LEND_vsyscall: - .size __kernel_vsyscall,.-.LSTART_vsyscall - - .section .eh_frame,"a",@progbits -.LSTARTFRAME: - .long .LENDCIE-.LSTARTCIE -.LSTARTCIE: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0x0c /* DW_CFA_def_cfa */ - .uleb128 4 - .uleb128 4 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .uleb128 1 - .align 4 -.LENDCIE: - - .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ -.LSTARTFDE1: - .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ - .long .LSTART_vsyscall-. /* PC-relative start address */ - .long .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 /* Augmentation length */ - /* What follows are the instructions for the table generation. - We have to record all changes of the stack pointer. */ - .byte 0x04 /* DW_CFA_advance_loc4 */ - .long .Lpush_ecx-.LSTART_vsyscall - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x08 /* RA at offset 8 now */ - .byte 0x04 /* DW_CFA_advance_loc4 */ - .long .Lpush_edx-.Lpush_ecx - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x0c /* RA at offset 12 now */ - .byte 0x04 /* DW_CFA_advance_loc4 */ - .long .Lenter_kernel-.Lpush_edx - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x10 /* RA at offset 16 now */ - .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */ - /* Finally the epilogue. */ - .byte 0x04 /* DW_CFA_advance_loc4 */ - .long .Lpop_ebp-.Lenter_kernel - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x12 /* RA at offset 12 now */ - .byte 0xc5 /* DW_CFA_restore %ebp */ - .byte 0x04 /* DW_CFA_advance_loc4 */ - .long .Lpop_edx-.Lpop_ebp - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x08 /* RA at offset 8 now */ - .byte 0x04 /* DW_CFA_advance_loc4 */ - .long .Lpop_ecx-.Lpop_edx - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x04 /* RA at offset 4 now */ - .align 4 -.LENDFDE1: - -#define SYSCALL_ENTER_KERNEL int $0x80 -#include "vsyscall-sigreturn.S" diff -puN arch/x86/ia32/vsyscall.lds~git-x86 /dev/null --- a/arch/x86/ia32/vsyscall.lds +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Linker script for vsyscall DSO. The vsyscall page is an ELF shared - * object prelinked to its virtual address. This script controls its layout. - */ - -/* This must match . */ -VSYSCALL_BASE = 0xffffe000; - -SECTIONS -{ - . = VSYSCALL_BASE + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - /* This linker script is used both with -r and with -shared. - For the layouts to match, we need to skip more than enough - space for the dynamic symbol table et al. If this amount - is insufficient, ld -shared will barf. Just increase it here. */ - . = VSYSCALL_BASE + 0x400; - - .text.vsyscall : { *(.text.vsyscall) } :text =0x90909090 - - /* This is an 32bit object and we cannot easily get the offsets - into the 64bit kernel. Just hardcode them here. This assumes - that all the stubs don't need more than 0x100 bytes. */ - . = VSYSCALL_BASE + 0x500; - - .text.sigreturn : { *(.text.sigreturn) } :text =0x90909090 - - . = VSYSCALL_BASE + 0x600; - - .text.rtsigreturn : { *(.text.rtsigreturn) } :text =0x90909090 - - .note : { *(.note.*) } :text :note - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .dynamic : { *(.dynamic) } :text :dynamic - .useless : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - } :text -} - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - LINUX_2.5 { - global: - __kernel_vsyscall; - __kernel_sigreturn; - __kernel_rt_sigreturn; - - local: *; - }; -} - -/* The ELF entry point can be used to set the AT_SYSINFO value. */ -ENTRY(__kernel_vsyscall); diff -puN arch/x86/kernel/Makefile_32~git-x86 arch/x86/kernel/Makefile_32 --- a/arch/x86/kernel/Makefile_32~git-x86 +++ a/arch/x86/kernel/Makefile_32 @@ -6,10 +6,14 @@ extra-y := head_32.o init_task.o vmlinux CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ - ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ + time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \ pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o + quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o rtc.o +obj-y += ptrace.o +obj-y += ds.o +obj-y += tls.o +obj-y += step.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ @@ -33,7 +37,6 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-$(CONFIG_KPROBES) += kprobes_32.o obj-$(CONFIG_MODULES) += module_32.o -obj-y += sysenter_32.o vsyscall_32.o obj-$(CONFIG_ACPI_SRAT) += srat_32.o obj-$(CONFIG_EFI) += efi_32.o efi_stub_32.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o @@ -48,41 +51,3 @@ obj-$(CONFIG_PARAVIRT) += paravirt_32.o obj-y += pcspeaker.o obj-$(CONFIG_SCx200) += scx200_32.o - -# vsyscall_32.o contains the vsyscall DSO images as __initdata. -# We must build both images before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so -targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so) -targets += vsyscall-note_32.o vsyscall_32.lds - -# The DSO images are built using a special linker script. -quiet_cmd_syscall = SYSCALL $@ - cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ - -Wl,-T,$(filter-out FORCE,$^) -o $@ - -export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386 - -vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) -SYSCFLAGS_vsyscall-sysenter_32.so = $(vsyscall-flags) -SYSCFLAGS_vsyscall-int80_32.so = $(vsyscall-flags) - -$(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \ -$(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \ - $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE - $(call if_changed,syscall) - -# We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. -extra-y += vsyscall-syms.o -$(obj)/built-in.o: $(obj)/vsyscall-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o - -SYSCFLAGS_vsyscall-syms.o = -r -$(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \ - $(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE - $(call if_changed,syscall) - - diff -puN arch/x86/kernel/Makefile_64~git-x86 arch/x86/kernel/Makefile_64 --- a/arch/x86/kernel/Makefile_64~git-x86 +++ a/arch/x86/kernel/Makefile_64 @@ -4,15 +4,19 @@ extra-y := head_64.o head64.o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -Ux86_64 -EXTRA_AFLAGS := -traditional obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ - ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ + time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ - i8253.o + i8253.o rtc.o +obj-y += ptrace.o +obj-y += ds.o +obj-y += step.o + +obj-$(CONFIG_IA32_EMULATION) += tls.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ diff -puN arch/x86/kernel/acpi/boot.c~git-x86 arch/x86/kernel/acpi/boot.c --- a/arch/x86/kernel/acpi/boot.c~git-x86 +++ a/arch/x86/kernel/acpi/boot.c @@ -78,7 +78,6 @@ int acpi_ht __initdata = 1; /* enable HT int acpi_lapic; int acpi_ioapic; int acpi_strict; -EXPORT_SYMBOL(acpi_strict); u8 acpi_sci_flags __initdata; int acpi_sci_override_gsi __initdata; @@ -490,8 +489,6 @@ int acpi_register_gsi(u32 gsi, int trigg return irq; } -EXPORT_SYMBOL(acpi_register_gsi); - /* * ACPI based hotplug support for CPU */ diff -puN arch/x86/kernel/acpi/wakeup_64.S~git-x86 arch/x86/kernel/acpi/wakeup_64.S --- a/arch/x86/kernel/acpi/wakeup_64.S~git-x86 +++ a/arch/x86/kernel/acpi/wakeup_64.S @@ -344,13 +344,13 @@ do_suspend_lowlevel: call save_processor_state movq $saved_context, %rax - movq %rsp, pt_regs_rsp(%rax) - movq %rbp, pt_regs_rbp(%rax) - movq %rsi, pt_regs_rsi(%rax) - movq %rdi, pt_regs_rdi(%rax) - movq %rbx, pt_regs_rbx(%rax) - movq %rcx, pt_regs_rcx(%rax) - movq %rdx, pt_regs_rdx(%rax) + movq %rsp, pt_regs_sp(%rax) + movq %rbp, pt_regs_bp(%rax) + movq %rsi, pt_regs_si(%rax) + movq %rdi, pt_regs_di(%rax) + movq %rbx, pt_regs_bx(%rax) + movq %rcx, pt_regs_cx(%rax) + movq %rdx, pt_regs_dx(%rax) movq %r8, pt_regs_r8(%rax) movq %r9, pt_regs_r9(%rax) movq %r10, pt_regs_r10(%rax) @@ -360,7 +360,7 @@ do_suspend_lowlevel: movq %r14, pt_regs_r14(%rax) movq %r15, pt_regs_r15(%rax) pushfq - popq pt_regs_eflags(%rax) + popq pt_regs_flags(%rax) movq $.L97, saved_rip(%rip) @@ -391,15 +391,15 @@ do_suspend_lowlevel: movq %rbx, %cr2 movq saved_context_cr0(%rax), %rbx movq %rbx, %cr0 - pushq pt_regs_eflags(%rax) + pushq pt_regs_flags(%rax) popfq - movq pt_regs_rsp(%rax), %rsp - movq pt_regs_rbp(%rax), %rbp - movq pt_regs_rsi(%rax), %rsi - movq pt_regs_rdi(%rax), %rdi - movq pt_regs_rbx(%rax), %rbx - movq pt_regs_rcx(%rax), %rcx - movq pt_regs_rdx(%rax), %rdx + movq pt_regs_sp(%rax), %rsp + movq pt_regs_bp(%rax), %rbp + movq pt_regs_si(%rax), %rsi + movq pt_regs_di(%rax), %rdi + movq pt_regs_bx(%rax), %rbx + movq pt_regs_cx(%rax), %rcx + movq pt_regs_dx(%rax), %rdx movq pt_regs_r8(%rax), %r8 movq pt_regs_r9(%rax), %r9 movq pt_regs_r10(%rax), %r10 diff -puN arch/x86/kernel/alternative.c~git-x86 arch/x86/kernel/alternative.c --- a/arch/x86/kernel/alternative.c~git-x86 +++ a/arch/x86/kernel/alternative.c @@ -356,15 +356,15 @@ void alternatives_smp_switch(int smp) spin_lock_irqsave(&smp_alt, flags); if (smp) { printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); - clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); + clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); + set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_unlock(mod->locks, mod->locks_end, mod->text, mod->text_end); @@ -431,8 +431,9 @@ void __init alternative_instructions(voi if (smp_alt_once) { if (1 == num_possible_cpus()) { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); + set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); + alternatives_smp_unlock(__smp_locks, __smp_locks_end, _text, _etext); } diff -puN arch/x86/kernel/aperture_64.c~git-x86 arch/x86/kernel/aperture_64.c --- a/arch/x86/kernel/aperture_64.c~git-x86 +++ a/arch/x86/kernel/aperture_64.c @@ -1,12 +1,12 @@ -/* +/* * Firmware replacement code. - * + * * Work around broken BIOSes that don't set an aperture or only set the - * aperture in the AGP bridge. - * If all fails map the aperture over some low memory. This is cheaper than - * doing bounce buffering. The memory is lost. This is done at early boot - * because only the bootmem allocator can allocate 32+MB. - * + * aperture in the AGP bridge. + * If all fails map the aperture over some low memory. This is cheaper than + * doing bounce buffering. The memory is lost. This is done at early boot + * because only the bootmem allocator can allocate 32+MB. + * * Copyright 2002 Andi Kleen, SuSE Labs. */ #include @@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initd int gart_iommu_aperture_allowed __initdata = 0; int fallback_aper_order __initdata = 1; /* 64MB */ -int fallback_aper_force __initdata = 0; +int fallback_aper_force __initdata = 0; int fix_aperture __initdata = 1; @@ -49,167 +49,180 @@ static void __init insert_aperture_resou /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ -static u32 __init allocate_aperture(void) +static u32 __init allocate_aperture(void) { u32 aper_size; - void *p; + void *p; - if (fallback_aper_order > 7) - fallback_aper_order = 7; - aper_size = (32 * 1024 * 1024) << fallback_aper_order; - - /* - * Aperture has to be naturally aligned. This means an 2GB aperture won't - * have much chance of finding a place in the lower 4GB of memory. - * Unfortunately we cannot move it up because that would make the - * IOMMU useless. + if (fallback_aper_order > 7) + fallback_aper_order = 7; + aper_size = (32 * 1024 * 1024) << fallback_aper_order; + + /* + * Aperture has to be naturally aligned. This means a 2GB aperture + * won't have much chance of finding a place in the lower 4GB of + * memory. Unfortunately we cannot move it up because that would + * make the IOMMU useless. */ p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); if (!p || __pa(p)+aper_size > 0xffffffff) { - printk("Cannot allocate aperture memory hole (%p,%uK)\n", - p, aper_size>>10); + printk(KERN_ERR + "Cannot allocate aperture memory hole (%p,%uK)\n", + p, aper_size>>10); if (p) free_bootmem(__pa(p), aper_size); return 0; } - printk("Mapping aperture over %d KB of RAM @ %lx\n", - aper_size >> 10, __pa(p)); + printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", + aper_size >> 10, __pa(p)); insert_aperture_resource((u32)__pa(p), aper_size); - return (u32)__pa(p); + + return (u32)__pa(p); } static int __init aperture_valid(u64 aper_base, u32 aper_size) -{ - if (!aper_base) +{ + if (!aper_base) return 0; - if (aper_size < 64*1024*1024) { - printk("Aperture too small (%d MB)\n", aper_size>>20); + + if (aper_size < 64*1024*1024) { + printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); return 0; } if (aper_base + aper_size > 0x100000000UL) { - printk("Aperture beyond 4GB. Ignoring.\n"); - return 0; + printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); + return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk("Aperture pointing to e820 RAM. Ignoring.\n"); - return 0; - } + printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + return 1; -} +} /* Find a PCI capability */ -static __u32 __init find_cap(int num, int slot, int func, int cap) -{ - u8 pos; +static __u32 __init find_cap(int num, int slot, int func, int cap) +{ int bytes; - if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST)) + u8 pos; + + if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) return 0; - pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST); - for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + + pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { u8 id; - pos &= ~3; - id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID); + + pos &= ~3; + id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); if (id == 0xff) break; - if (id == cap) - return pos; - pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT); - } + if (id == cap) + return pos; + pos = read_pci_config_byte(num, slot, func, + pos+PCI_CAP_LIST_NEXT); + } return 0; -} +} /* Read a standard AGPv3 bridge header */ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) -{ +{ u32 apsize; u32 apsizereg; int nbits; u32 aper_low, aper_hi; u64 aper; - printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); - apsizereg = read_pci_config_16(num,slot,func, cap + 0x14); + printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); + apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); if (apsizereg == 0xffffffff) { - printk("APSIZE in AGP bridge unreadable\n"); + printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); return 0; } apsize = apsizereg & 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ - if (apsize & 0xff) - apsize |= 0xf00; + if (apsize & 0xff) + apsize |= 0xf00; nbits = hweight16(apsize); *order = 7 - nbits; if ((int)*order < 0) /* < 32MB */ *order = 0; - - aper_low = read_pci_config(num,slot,func, 0x10); - aper_hi = read_pci_config(num,slot,func,0x14); + + aper_low = read_pci_config(num, slot, func, 0x10); + aper_hi = read_pci_config(num, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); - printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", - aper, 32 << *order, apsizereg); + printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", + aper, 32 << *order, apsizereg); if (!aperture_valid(aper, (32*1024*1024) << *order)) - return 0; - return (u32)aper; -} - -/* Look for an AGP bridge. Windows only expects the aperture in the - AGP bridge and some BIOS forget to initialize the Northbridge too. - Work around this here. - - Do an PCI bus scan by hand because we're running before the PCI - subsystem. - - All K8 AGP bridges are AGPv3 compliant, so we can do this scan - generically. It's probably overkill to always scan all slots because - the AGP bridges should be always an own bus on the HT hierarchy, - but do it here for future safety. */ + return 0; + return (u32)aper; +} + +/* + * Look for an AGP bridge. Windows only expects the aperture in the + * AGP bridge and some BIOS forget to initialize the Northbridge too. + * Work around this here. + * + * Do an PCI bus scan by hand because we're running before the PCI + * subsystem. + * + * All K8 AGP bridges are AGPv3 compliant, so we can do this scan + * generically. It's probably overkill to always scan all slots because + * the AGP bridges should be always an own bus on the HT hierarchy, + * but do it here for future safety. + */ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) { int num, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 256; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { + for (num = 0; num < 256; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { u32 class, cap; u8 type; - class = read_pci_config(num,slot,func, + class = read_pci_config(num, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; - - switch (class >> 16) { + break; + + switch (class >> 16) { case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(num,slot,func,PCI_CAP_ID_AGP); + cap = find_cap(num, slot, func, + PCI_CAP_ID_AGP); if (!cap) break; - *valid_agp = 1; - return read_agp(num,slot,func,cap,order); - } - + *valid_agp = 1; + return read_agp(num, slot, func, cap, + order); + } + /* No multi-function device? */ - type = read_pci_config_byte(num,slot,func, + type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) break; - } - } + } + } } - printk("No AGP bridge found\n"); + printk(KERN_INFO "No AGP bridge found\n"); + return 0; } void __init gart_iommu_hole_init(void) -{ - int fix, num; +{ u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; - int valid_agp = 0; + int fix, num, valid_agp = 0; if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) @@ -218,24 +231,24 @@ void __init gart_iommu_hole_init(void) printk(KERN_INFO "Checking aperture...\n"); fix = 0; - for (num = 24; num < 32; num++) { + for (num = 24; num < 32; num++) { if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) continue; iommu_detected = 1; gart_iommu_aperture = 1; - aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; + aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; + aper_base <<= 25; + + printk(KERN_INFO "CPU %d: aperture @ %Lx size %u MB\n", + num-24, aper_base, aper_size>>20); - printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, - aper_base, aper_size>>20); - if (!aperture_valid(aper_base, aper_size)) { - fix = 1; - break; + fix = 1; + break; } if ((last_aper_order && aper_order != last_aper_order) || @@ -245,55 +258,64 @@ void __init gart_iommu_hole_init(void) } last_aper_order = aper_order; last_aper_base = aper_base; - } + } if (!fix && !fallback_aper_force) { if (last_aper_base) { unsigned long n = (32 * 1024 * 1024) << last_aper_order; + insert_aperture_resource((u32)last_aper_base, n); } - return; + return; } if (!fallback_aper_force) - aper_alloc = search_agp_bridge(&aper_order, &valid_agp); - - if (aper_alloc) { + aper_alloc = search_agp_bridge(&aper_order, &valid_agp); + + if (aper_alloc) { /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || - fallback_aper_force) { - printk("Your BIOS doesn't leave a aperture memory hole\n"); - printk("Please enable the IOMMU option in the BIOS setup\n"); - printk("This costs you %d MB of RAM\n", - 32 << fallback_aper_order); + fallback_aper_force) { + printk(KERN_ERR + "Your BIOS doesn't leave a aperture memory hole\n"); + printk(KERN_ERR + "Please enable the IOMMU option in the BIOS setup\n"); + printk(KERN_ERR + "This costs you %d MB of RAM\n", + 32 << fallback_aper_order); aper_order = fallback_aper_order; aper_alloc = allocate_aperture(); - if (!aper_alloc) { - /* Could disable AGP and IOMMU here, but it's probably - not worth it. But the later users cannot deal with - bad apertures and turning on the aperture over memory - causes very strange problems, so it's better to - panic early. */ + if (!aper_alloc) { + /* + * Could disable AGP and IOMMU here, but it's + * probably not worth it. But the later users + * cannot deal with bad apertures and turning + * on the aperture over memory causes very + * strange problems, so it's better to panic + * early. + */ panic("Not enough memory for aperture"); } - } else { - return; - } + } else { + return; + } /* Fix up the north bridges */ - for (num = 24; num < 32; num++) { + for (num = 24; num < 32; num++) { if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; + continue; - /* Don't enable translation yet. That is done later. - Assume this BIOS didn't initialise the GART so - just overwrite all previous bits */ - write_pci_config(0, num, 3, 0x90, aper_order<<1); - write_pci_config(0, num, 3, 0x94, aper_alloc>>25); - } -} + /* + * Don't enable translation yet. That is done later. + * Assume this BIOS didn't initialise the GART so + * just overwrite all previous bits + */ + write_pci_config(0, num, 3, 0x90, aper_order<<1); + write_pci_config(0, num, 3, 0x94, aper_alloc>>25); + } +} diff -puN arch/x86/kernel/apic_32.c~git-x86 arch/x86/kernel/apic_32.c --- a/arch/x86/kernel/apic_32.c~git-x86 +++ a/arch/x86/kernel/apic_32.c @@ -43,8 +43,6 @@ #include #include -#include "io_ports.h" - /* * Sanity check */ @@ -135,9 +133,9 @@ void apic_wait_icr_idle(void) cpu_relax(); } -unsigned long safe_apic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { - unsigned long send_status; + u32 send_status; int timeout; timeout = 0; @@ -563,6 +561,9 @@ static void local_apic_timer_interrupt(v return; } + /* + * the NMI deadlock-detector uses this. + */ per_cpu(irq_stat, cpu).apic_timer_irqs++; evt->event_handler(evt); @@ -617,7 +618,7 @@ int setup_profiling_timer(unsigned int m void clear_local_APIC(void) { int maxlvt = lapic_get_maxlvt(); - unsigned long v; + u32 v; /* * Masking an LVT entry can trigger a local APIC error @@ -1077,7 +1078,7 @@ static int __init detect_init_APIC (void printk(KERN_WARNING "Could not enable APIC!\n"); return -1; } - set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ @@ -1167,7 +1168,7 @@ fake_ioapic_page: int __init APIC_init_uniprocessor (void) { if (enable_local_apic < 0) - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); if (!smp_found_config && !cpu_has_apic) return -1; @@ -1179,7 +1180,7 @@ int __init APIC_init_uniprocessor (void) APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } @@ -1210,50 +1211,6 @@ int __init APIC_init_uniprocessor (void) } /* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); - -static int __init parse_nolapic(char *arg) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -early_param("nolapic", parse_nolapic); - -static int __init parse_disable_lapic_timer(char *arg) -{ - local_apic_timer_disabled = 1; - return 0; -} -early_param("nolapic_timer", parse_disable_lapic_timer); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init apic_set_verbosity(char *str) -{ - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - return 1; -} - -__setup("apic=", apic_set_verbosity); - - -/* * Local APIC interrupts */ @@ -1565,3 +1522,46 @@ device_initcall(init_lapic_sysfs); static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ + +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); + +static int __init parse_nolapic(char *arg) +{ + enable_local_apic = -1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + return 0; +} +early_param("nolapic", parse_nolapic); + +static int __init parse_disable_lapic_timer(char *arg) +{ + local_apic_timer_disabled = 1; + return 0; +} +early_param("nolapic_timer", parse_disable_lapic_timer); + +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + +static int __init apic_set_verbosity(char *str) +{ + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + return 1; +} +__setup("apic=", apic_set_verbosity); + diff -puN arch/x86/kernel/apic_64.c~git-x86 arch/x86/kernel/apic_64.c --- a/arch/x86/kernel/apic_64.c~git-x86 +++ a/arch/x86/kernel/apic_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -43,12 +44,12 @@ int apic_verbosity; int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; +int disable_apic; /* Local APIC timer works in C2? */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); -static struct resource *ioapic_resources; static struct resource lapic_resource = { .name = "Local APIC", .flags = IORESOURCE_MEM | IORESOURCE_BUSY, @@ -60,10 +61,8 @@ static int lapic_next_event(unsigned lon struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); - static void lapic_timer_broadcast(cpumask_t mask); - -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen); +static void apic_pm_activate(void); static struct clock_event_device lapic_clockevent = { .name = "lapic", @@ -78,66 +77,43 @@ static struct clock_event_device lapic_c }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) +/* + * Get the LAPIC version + */ +static inline int lapic_get_version(void) { - apic_write(APIC_TMICT, delta); - return 0; + return GET_APIC_VERSION(apic_read(APIC_LVR)); } -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) +/* + * Check, if the APIC is integrated or a seperate chip + */ +static inline int lapic_is_integrated(void) { - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); + return 1; } /* - * Local APIC timer broadcast function + * Check, whether this is a modern or a first generation APIC */ -static void lapic_timer_broadcast(cpumask_t mask) +static int modern_apic(void) { -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0xf) + return 1; + return lapic_get_version() >= 0x14; } -static void apic_pm_activate(void); - void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -unsigned int safe_apic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { - unsigned int send_status; + u32 send_status; int timeout; timeout = 0; @@ -151,7 +127,10 @@ unsigned int safe_apic_wait_icr_idle(voi return send_status; } -void enable_NMI_through_LVT0 (void * dummy) +/** + * enable_NMI_through_LVT0 - enable NMI through local vector table 0 + */ +void enable_NMI_through_LVT0(void *dummy) { unsigned int v; @@ -160,7 +139,10 @@ void enable_NMI_through_LVT0 (void * dum apic_write(APIC_LVT0, v); } -int get_maxlvt(void) +/** + * lapic_get_maxlvt - get the maximum number of local vector table entries + */ +int lapic_get_maxlvt(void) { unsigned int v, maxlvt; @@ -170,203 +152,493 @@ int get_maxlvt(void) } /* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ -void ack_bad_irq(unsigned int irq) -{ - printk("unexpected IRQ trap at vector %02x\n", irq); - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But don't ack when the APIC is disabled. -AK - */ - if (!disable_apic) - ack_APIC_irq(); -} -void clear_local_APIC(void) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { - int maxlvt; - unsigned int v; + unsigned int lvtt_value, tmp_value; - maxlvt = get_maxlvt(); + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!irqen) + lvtt_value |= APIC_LVT_MASKED; - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } + apic_write(APIC_LVTT, lvtt_value); /* - * Clean APIC state for other OSs: + * Divide PICLK by 16 */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + tmp_value = apic_read(APIC_TDCR); + apic_write(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); + + if (!oneshot) + apic_write(APIC_TMICT, clocks); } -void disconnect_bsp_APIC(int virt_wire_setup) -{ - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; - /* For LVT1 make it edge triggered, active high, nmi and enabled */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + apic_write(reg, v); } -void disable_local_APIC(void) +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) { - unsigned int value; + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} - clear_local_APIC(); +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); +/* + * Program the next event, relative to now + */ +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + apic_write(APIC_TMICT, delta); + return 0; } -void lapic_shutdown(void) +/* + * Setup the lapic timer in periodic or oneshot mode + */ +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) { unsigned long flags; + unsigned int v; - if (!cpu_has_apic) + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); - disable_local_APIC(); + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC, 1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; + } local_irq_restore(flags); } /* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. + * Local APIC timer broadcast function */ -int __init verify_local_APIC(void) +static void lapic_timer_broadcast(cpumask_t mask) { - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#endif +} - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; +/* + * Setup the local APIC timer for this CPU. Copy the initilized values + * of the boot CPU and register the clock event in the framework. + */ +static void setup_APIC_timer(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; + clockevents_register_device(levt); +} - /* +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +#define TICK_COUNT 100000000 + +static void __init calibrate_APIC_clock(void) +{ + unsigned apic, apic_start; + unsigned long tsc, tsc_start; + int result; + + local_irq_disable(); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + * + * No interrupt enable ! + */ + __setup_APIC_LVTT(250000000, 0, 0); + + apic_start = apic_read(APIC_TMCCT); +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ + apic = apic_read(APIC_TMCCT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscll(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscll(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic_start - apic) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * tsc_khz / + (tsc - tsc_start); + } + + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + + printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", + result / 1000 / 1000, result / 1000 % 1000); + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = result / HZ; +} + +void __init setup_boot_APIC_clock(void) +{ + /* + * The local apic timer can be disabled via the kernel commandline. + * Register the lapic timer as a dummy clock event source on SMP + * systems, so the broadcast mechanism is used. On UP systems simply + * ignore it. + */ + if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); + return; + } + + printk(KERN_INFO "Using local APIC timer interrupts.\n"); + calibrate_APIC_clock(); + + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=1!\n"); + + setup_APIC_timer(); +} + +/* + * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the + * C1E flag only in the secondary CPU, so when we detect the wreckage + * we already have enabled the boot CPU local apic timer. Check, if + * disable_apic_timer is set and the DUMMY flag is cleared. If yes, + * set the DUMMY flag again and force the broadcast mode in the + * clockevents layer. + */ +void __cpuinit check_boot_apic_timer_broadcast(void) +{ + if (!disable_apic_timer || + (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) + return; + + printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); + lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; + + local_irq_enable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); + local_irq_disable(); +} + +void __cpuinit setup_secondary_APIC_clock(void) +{ + check_boot_apic_timer_broadcast(); + setup_APIC_timer(); +} + +/* + * The guts of the apic timer interrupt + */ +static void local_apic_timer_interrupt(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + + /* + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. + * + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } + + /* + * the NMI deadlock-detector uses this. + */ + add_pda(apic_timer_irqs, 1); + + evt->event_handler(evt); +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesn't support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +void smp_apic_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + exit_idle(); + irq_enter(); + local_apic_timer_interrupt(); + irq_exit(); + set_irq_regs(old_regs); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + + +/* + * Local APIC start and shutdown + */ + +/** + * clear_local_APIC - shutdown the local APIC + * + * This is called, when a CPU is disabled and before rebooting, so the state of + * the local APIC has no dangling leftovers. Also used to cleanout any BIOS + * leftovers during boot. + */ +void clear_local_APIC(void) +{ + int maxlvt = lapic_get_maxlvt(); + u32 v; + + /* + * Masking an LVT entry can trigger a local APIC error + * if the vector is zero. Mask LVTERR first to prevent this. + */ + if (maxlvt >= 3) { + v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + } + /* + * Careful: we have to set masks only first to deassert + * any level-triggered sources. + */ + v = apic_read(APIC_LVTT); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT1); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); + if (maxlvt >= 4) { + v = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); + } + + /* + * Clean APIC state for other OSs: + */ + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); + if (maxlvt >= 3) + apic_write(APIC_LVTERR, APIC_LVT_MASKED); + if (maxlvt >= 4) + apic_write(APIC_LVTPC, APIC_LVT_MASKED); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ + unsigned int value; + + clear_local_APIC(); + + /* + * Disable APIC (implies clearing of registers + * for 82489DX!). + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); +} + +void lapic_shutdown(void) +{ + unsigned long flags; + + if (!cpu_has_apic) + return; + + local_irq_save(flags); + + disable_local_APIC(); + + local_irq_restore(flags); +} + +/* + * This is to verify that we're looking at a real local APIC. + * Check these against your board if the CPUs aren't getting + * started for no apparent reason. + */ +int __init verify_local_APIC(void) +{ + unsigned int reg0, reg1; + + /* + * The version register is read-only in a real APIC. + */ + reg0 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); + apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); + reg1 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); + + /* + * The two version reads above should print the same + * numbers. If the second one is different, then we + * poke at a non-APIC. + */ + if (reg1 != reg0) + return 0; + + /* + * Check if the version looks reasonably. + */ + reg1 = GET_APIC_VERSION(reg0); + if (reg1 == 0x00 || reg1 == 0xff) + return 0; + reg1 = lapic_get_maxlvt(); + if (reg1 < 0x02 || reg1 == 0xff) + return 0; + + /* + * The ID register is read/write in a real APIC. + */ + reg0 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; + + /* * The next two are just to see if we have sane values. * They're only really relevant if we're in Virtual Wire * compatibility mode, but most boxes are anymore. */ reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0); + apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); reg1 = apic_read(APIC_LVT1); apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); return 1; } +/** + * sync_Arb_IDs - synchronize APIC bus arbitration IDs + */ void __init sync_Arb_IDs(void) { /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (ver >= 0x14) /* P4 or higher */ + if (modern_apic()) return; /* @@ -418,9 +690,12 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } -void __cpuinit setup_local_APIC (void) +/** + * setup_local_APIC - setup the local APIC + */ +void __cpuinit setup_local_APIC(void) { - unsigned int value, maxlvt; + unsigned int value; int i, j; value = apic_read(APIC_LVR); @@ -516,183 +791,27 @@ void __cpuinit setup_local_APIC (void) else value = APIC_DM_NMI | APIC_LVT_MASKED; apic_write(APIC_LVT1, value); - - { - unsigned oldvalue; - maxlvt = get_maxlvt(); - oldvalue = apic_read(APIC_ESR); - value = ERROR_APIC_VECTOR; // enables sending errors - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, - "ESR value after enabling vector: %08x, after %08x\n", - oldvalue, value); - } - - nmi_watchdog_default(); - setup_apic_nmi_watchdog(NULL); - apic_pm_activate(); } -#ifdef CONFIG_PM - -static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) +void __cpuinit lapic_setup_esr(void) { - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = get_maxlvt(); + unsigned maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = get_maxlvt(); - - local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - local_irq_restore(flags); - return 0; -} - -static struct sysdev_class lapic_sysclass = { - set_kset_name("lapic"), - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); } -static int __init init_lapic_sysfs(void) +void __cpuinit end_local_APIC_setup(void) { - int error; - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -static int __init apic_set_verbosity(char *str) -{ - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; + lapic_setup_esr(); + nmi_watchdog_default(); + setup_apic_nmi_watchdog(NULL); + apic_pm_activate(); } -early_param("apic", apic_set_verbosity); /* * Detect and enable local APICs on non-SMP boards. @@ -700,77 +819,21 @@ early_param("apic", apic_set_verbosity); * On AMD64 we trust the BIOS - if it says no APIC it is likely * not correctly set up (usually the APIC timer won't work etc.) */ - -static int __init detect_init_APIC (void) +static int __init detect_init_APIC(void) { - if (!cpu_has_apic) { - printk(KERN_INFO "No local APIC present\n"); - return -1; - } - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_id = 0; - return 0; -} - -#ifdef CONFIG_X86_IO_APIC -static struct resource * __init ioapic_setup_resources(void) -{ -#define IOAPIC_RESOURCE_NAME_SIZE 11 - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - if (mem != NULL) { - memset(mem, 0, n); - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - } - - ioapic_resources = res; - - return res; -} - -static int __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - printk("IO APIC resources could be not be allocated.\n"); + if (!cpu_has_apic) { + printk(KERN_INFO "No local APIC present\n"); return -1; } - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; - } - + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_id = 0; return 0; } -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); -#endif - +/** + * init_apic_mappings - initialize APIC mappings + */ void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -800,295 +863,279 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); - - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - struct resource *ioapic_res; - - ioapic_res = ioapic_setup_resources(); - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - } else { - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %016lx (%016lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } - } - } } /* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. */ - -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) +int __init APIC_init_uniprocessor(void) { - unsigned int lvtt_value, tmp_value; + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; + verify_local_APIC(); - apic_write(APIC_LVTT, lvtt_value); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); + + setup_local_APIC(); /* - * Divide PICLK by 16 + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks); -} - -static void setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); + end_local_APIC_setup(); - clockevents_register_device(levt); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); + else + nr_ioapics = 0; + setup_boot_APIC_clock(); + check_nmi_watchdog(); + return 0; } /* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. + * Local APIC interrupts */ -#define TICK_COUNT 100000000 - -static void __init calibrate_APIC_clock(void) +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +asmlinkage void smp_spurious_interrupt(void) { - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - + unsigned int v; + exit_idle(); + irq_enter(); /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + add_pda(irq_spurious_count, 1); + irq_exit(); +} - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); +/* + * This interrupt should never happen with our APIC/SMP architecture + */ +asmlinkage void smp_error_interrupt(void) +{ + unsigned int v, v1; - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); + exit_idle(); + irq_enter(); + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); - calibration_result = result / HZ; + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", + smp_processor_id(), v , v1); + irq_exit(); } -void __init setup_boot_APIC_clock (void) +void disconnect_bsp_APIC(int virt_wire_setup) { - /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; - printk(KERN_INFO "Using local APIC timer interrupts.\n"); - calibrate_APIC_clock(); + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=1!\n"); + if (!virt_wire_setup) { + /* + * For LVT0 make it edge triggered, active high, + * external and enabled + */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); + } - setup_APIC_timer(); + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } /* - * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the - * C1E flag only in the secondary CPU, so when we detect the wreckage - * we already have enabled the boot CPU local apic timer. Check, if - * disable_apic_timer is set and the DUMMY flag is cleared. If yes, - * set the DUMMY flag again and force the broadcast mode in the - * clockevents layer. + * Power management */ -void __cpuinit check_boot_apic_timer_broadcast(void) +#ifdef CONFIG_PM + +static struct { + /* 'active' is true if the local APIC was enabled by us and + not the BIOS; this signifies that we are also responsible + for disabling it before entering apm/acpi suspend */ + int active; + /* r/w apic fields */ + unsigned int apic_id; + unsigned int apic_taskpri; + unsigned int apic_ldr; + unsigned int apic_dfr; + unsigned int apic_spiv; + unsigned int apic_lvtt; + unsigned int apic_lvtpc; + unsigned int apic_lvt0; + unsigned int apic_lvt1; + unsigned int apic_lvterr; + unsigned int apic_tmict; + unsigned int apic_tdcr; + unsigned int apic_thmr; +} apic_pm_state; + +static int lapic_suspend(struct sys_device *dev, pm_message_t state) { - if (!disable_apic_timer || - (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) - return; + unsigned long flags; + int maxlvt; - printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); - lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; + if (!apic_pm_state.active) + return 0; - local_irq_enable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); - local_irq_disable(); -} + maxlvt = lapic_get_maxlvt(); -void __cpuinit setup_secondary_APIC_clock(void) -{ - check_boot_apic_timer_broadcast(); - setup_APIC_timer(); + apic_pm_state.apic_id = apic_read(APIC_ID); + apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); + apic_pm_state.apic_ldr = apic_read(APIC_LDR); + apic_pm_state.apic_dfr = apic_read(APIC_DFR); + apic_pm_state.apic_spiv = apic_read(APIC_SPIV); + apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); + apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); + apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); + apic_pm_state.apic_tmict = apic_read(APIC_TMICT); + apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif + local_irq_save(flags); + disable_local_APIC(); + local_irq_restore(flags); + return 0; } -int setup_profiling_timer(unsigned int multiplier) +static int lapic_resume(struct sys_device *dev) { - return -EINVAL; -} + unsigned int l, h; + unsigned long flags; + int maxlvt; -void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, - unsigned char msg_type, unsigned char mask) -{ - unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} + if (!apic_pm_state.active) + return 0; -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ + maxlvt = lapic_get_maxlvt(); -void smp_local_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + local_irq_save(flags); + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); + apic_write(APIC_ID, apic_pm_state.apic_id); + apic_write(APIC_DFR, apic_pm_state.apic_dfr); + apic_write(APIC_LDR, apic_pm_state.apic_ldr); + apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); + apic_write(APIC_SPIV, apic_pm_state.apic_spiv); + apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); + apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); + apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); + apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); + apic_write(APIC_TMICT, apic_pm_state.apic_tmict); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; +} - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } +static struct sysdev_class lapic_sysclass = { + set_kset_name("lapic"), + .resume = lapic_resume, + .suspend = lapic_suspend, +}; - /* - * the NMI deadlock-detector uses this. - */ - add_pda(apic_timer_irqs, 1); +static struct sys_device device_lapic = { + .id = 0, + .cls = &lapic_sysclass, +}; - evt->event_handler(evt); +static void __cpuinit apic_pm_activate(void) +{ + apic_pm_state.active = 1; } -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) +static int __init init_lapic_sysfs(void) { - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - exit_idle(); - irq_enter(); - smp_local_timer_interrupt(); - irq_exit(); - set_irq_regs(old_regs); + int error; + if (!cpu_has_apic) + return 0; + /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ + error = sysdev_class_register(&lapic_sysclass); + if (!error) + error = sysdev_register(&device_lapic); + return error; } +device_initcall(init_lapic_sysfs); + +#else /* CONFIG_PM */ + +static void apic_pm_activate(void) { } + +#endif /* CONFIG_PM */ /* * apic_is_clustered_box() -- Check if we can expect good TSC @@ -1138,96 +1185,33 @@ __cpuinit int apic_is_clustered_box(void } /* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -asmlinkage void smp_spurious_interrupt(void) -{ - unsigned int v; - exit_idle(); - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - add_pda(irq_spurious_count, 1); - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ - -asmlinkage void smp_error_interrupt(void) -{ - unsigned int v, v1; - - exit_idle(); - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -int disable_apic; - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. + * APIC command line parameters */ -int __init APIC_init_uniprocessor (void) +static int __init apic_set_verbosity(char *str) { - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; + if (str == NULL) { + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", str); + return -EINVAL; } - verify_local_APIC(); - - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); - - setup_local_APIC(); - - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else - nr_ioapics = 0; - setup_boot_APIC_clock(); - check_nmi_watchdog(); return 0; } +early_param("apic", apic_set_verbosity); static __init int setup_disableapic(char *str) { disable_apic = 1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return 0; } early_param("disableapic", setup_disableapic); diff -puN arch/x86/kernel/apm_32.c~git-x86 arch/x86/kernel/apm_32.c --- a/arch/x86/kernel/apm_32.c~git-x86 +++ a/arch/x86/kernel/apm_32.c @@ -235,8 +235,6 @@ #include #include -#include "io_ports.h" - #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); #endif diff -puN arch/x86/kernel/asm-offsets_32.c~git-x86 arch/x86/kernel/asm-offsets_32.c --- a/arch/x86/kernel/asm-offsets_32.c~git-x86 +++ a/arch/x86/kernel/asm-offsets_32.c @@ -38,15 +38,15 @@ void foo(void); void foo(void) { - OFFSET(SIGCONTEXT_eax, sigcontext, eax); - OFFSET(SIGCONTEXT_ebx, sigcontext, ebx); - OFFSET(SIGCONTEXT_ecx, sigcontext, ecx); - OFFSET(SIGCONTEXT_edx, sigcontext, edx); - OFFSET(SIGCONTEXT_esi, sigcontext, esi); - OFFSET(SIGCONTEXT_edi, sigcontext, edi); - OFFSET(SIGCONTEXT_ebp, sigcontext, ebp); - OFFSET(SIGCONTEXT_esp, sigcontext, esp); - OFFSET(SIGCONTEXT_eip, sigcontext, eip); + OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax); + OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx); + OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx); + OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx); + OFFSET(IA32_SIGCONTEXT_si, sigcontext, si); + OFFSET(IA32_SIGCONTEXT_di, sigcontext, di); + OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp); + OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp); + OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip); BLANK(); OFFSET(CPUINFO_x86, cpuinfo_x86, x86); @@ -75,34 +75,34 @@ void foo(void) OFFSET(GDS_pad, Xgt_desc_struct, pad); BLANK(); - OFFSET(PT_EBX, pt_regs, ebx); - OFFSET(PT_ECX, pt_regs, ecx); - OFFSET(PT_EDX, pt_regs, edx); - OFFSET(PT_ESI, pt_regs, esi); - OFFSET(PT_EDI, pt_regs, edi); - OFFSET(PT_EBP, pt_regs, ebp); - OFFSET(PT_EAX, pt_regs, eax); - OFFSET(PT_DS, pt_regs, xds); - OFFSET(PT_ES, pt_regs, xes); - OFFSET(PT_FS, pt_regs, xfs); - OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); - OFFSET(PT_EIP, pt_regs, eip); - OFFSET(PT_CS, pt_regs, xcs); - OFFSET(PT_EFLAGS, pt_regs, eflags); - OFFSET(PT_OLDESP, pt_regs, esp); - OFFSET(PT_OLDSS, pt_regs, xss); + OFFSET(PT_EBX, pt_regs, bx); + OFFSET(PT_ECX, pt_regs, cx); + OFFSET(PT_EDX, pt_regs, dx); + OFFSET(PT_ESI, pt_regs, si); + OFFSET(PT_EDI, pt_regs, di); + OFFSET(PT_EBP, pt_regs, bp); + OFFSET(PT_EAX, pt_regs, ax); + OFFSET(PT_DS, pt_regs, ds); + OFFSET(PT_ES, pt_regs, es); + OFFSET(PT_FS, pt_regs, fs); + OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); + OFFSET(PT_EIP, pt_regs, ip); + OFFSET(PT_CS, pt_regs, cs); + OFFSET(PT_EFLAGS, pt_regs, flags); + OFFSET(PT_OLDESP, pt_regs, sp); + OFFSET(PT_OLDSS, pt_regs, ss); BLANK(); OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); - OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); + OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); BLANK(); OFFSET(pbe_address, pbe, address); OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); - /* Offset from the sysenter stack to tss.esp0 */ - DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) - + /* Offset from the sysenter stack to tss.sp0 */ + DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - sizeof(struct tss_struct)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); @@ -111,8 +111,6 @@ void foo(void) DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); - DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK); - OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); #ifdef CONFIG_PARAVIRT @@ -123,7 +121,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff -puN arch/x86/kernel/asm-offsets_64.c~git-x86 arch/x86/kernel/asm-offsets_64.c --- a/arch/x86/kernel/asm-offsets_64.c~git-x86 +++ a/arch/x86/kernel/asm-offsets_64.c @@ -38,7 +38,6 @@ int main(void) #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) ENTRY(state); ENTRY(flags); - ENTRY(thread); ENTRY(pid); BLANK(); #undef ENTRY @@ -47,6 +46,9 @@ int main(void) ENTRY(addr_limit); ENTRY(preempt_count); ENTRY(status); +#ifdef CONFIG_IA32_EMULATION + ENTRY(sysenter_return); +#endif BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) @@ -61,15 +63,15 @@ int main(void) #undef ENTRY #ifdef CONFIG_IA32_EMULATION #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) - ENTRY(eax); - ENTRY(ebx); - ENTRY(ecx); - ENTRY(edx); - ENTRY(esi); - ENTRY(edi); - ENTRY(ebp); - ENTRY(esp); - ENTRY(eip); + ENTRY(ax); + ENTRY(bx); + ENTRY(cx); + ENTRY(dx); + ENTRY(si); + ENTRY(di); + ENTRY(bp); + ENTRY(sp); + ENTRY(ip); BLANK(); #undef ENTRY DEFINE(IA32_RT_SIGFRAME_sigcontext, @@ -81,14 +83,14 @@ int main(void) DEFINE(pbe_next, offsetof(struct pbe, next)); BLANK(); #define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry)) - ENTRY(rbx); - ENTRY(rbx); - ENTRY(rcx); - ENTRY(rdx); - ENTRY(rsp); - ENTRY(rbp); - ENTRY(rsi); - ENTRY(rdi); + ENTRY(bx); + ENTRY(bx); + ENTRY(cx); + ENTRY(dx); + ENTRY(sp); + ENTRY(bp); + ENTRY(si); + ENTRY(di); ENTRY(r8); ENTRY(r9); ENTRY(r10); @@ -97,7 +99,7 @@ int main(void) ENTRY(r13); ENTRY(r14); ENTRY(r15); - ENTRY(eflags); + ENTRY(flags); BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) @@ -105,7 +107,6 @@ int main(void) ENTRY(cr2); ENTRY(cr3); ENTRY(cr4); - ENTRY(cr8); BLANK(); #undef ENTRY DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); diff -puN arch/x86/kernel/cpu/addon_cpuid_features.c~git-x86 arch/x86/kernel/cpu/addon_cpuid_features.c --- a/arch/x86/kernel/cpu/addon_cpuid_features.c~git-x86 +++ a/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -45,6 +45,6 @@ void __cpuinit init_scattered_cpuid_feat ®s[CR_ECX], ®s[CR_EDX]); if (regs[cb->reg] & (1 << cb->bit)) - set_bit(cb->feature, c->x86_capability); + set_cpu_cap(c, cb->feature); } } diff -puN arch/x86/kernel/cpu/bugs.c~git-x86 arch/x86/kernel/cpu/bugs.c --- a/arch/x86/kernel/cpu/bugs.c~git-x86 +++ a/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,7 @@ __setup("mca-pentium", mca_pentium); static int __init no_387(char *s) { boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); + write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0()); return 1; } diff -puN arch/x86/kernel/cpu/common.c~git-x86 arch/x86/kernel/cpu/common.c --- a/arch/x86/kernel/cpu/common.c~git-x86 +++ a/arch/x86/kernel/cpu/common.c @@ -634,7 +634,7 @@ void __init early_cpu_init(void) struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); - regs->xfs = __KERNEL_PERCPU; + regs->fs = __KERNEL_PERCPU; return regs; } @@ -691,7 +691,7 @@ void __cpuinit cpu_init(void) BUG(); enter_lazy_tlb(&init_mm, curr); - load_esp0(t, thread); + load_sp0(t, thread); set_tss_desc(cpu,t); load_TR_desc(); load_LDT(&init_mm.context); diff -puN arch/x86/kernel/cpu/cyrix.c~git-x86 arch/x86/kernel/cpu/cyrix.c --- a/arch/x86/kernel/cpu/cyrix.c~git-x86 +++ a/arch/x86/kernel/cpu/cyrix.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -126,15 +127,12 @@ static void __cpuinit set_cx86_reorder(v static void __cpuinit set_cx86_memwb(void) { - u32 cr0; - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ - cr0 = 0x20000000; - write_cr0(read_cr0() | cr0); + write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); } diff -puN arch/x86/kernel/cpu/intel.c~git-x86 arch/x86/kernel/cpu/intel.c --- a/arch/x86/kernel/cpu/intel.c~git-x86 +++ a/arch/x86/kernel/cpu/intel.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "cpu.h" @@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } + + if (cpu_has_bts) + ds_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) @@ -342,5 +347,22 @@ unsigned long cmpxchg_386_u32(volatile v EXPORT_SYMBOL(cmpxchg_386_u32); #endif +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + // arch_initcall(intel_cpu_init); diff -puN arch/x86/kernel/cpu/mcheck/mce_64.c~git-x86 arch/x86/kernel/cpu/mcheck/mce_64.c --- a/arch/x86/kernel/cpu/mcheck/mce_64.c~git-x86 +++ a/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait) * separate MCEs from kernel messages to avoid bogus bug reports. */ -struct mce_log mcelog = { +static struct mce_log mcelog = { MCE_LOG_SIGNATURE, MCE_LOG_LEN, }; @@ -80,7 +80,7 @@ void mce_log(struct mce *mce) /* When the buffer fills up discard new entries. Assume that the earlier errors are the more interesting. */ if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, &mcelog.flags); + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); return; } /* Old left over entry. Skip. */ @@ -110,12 +110,12 @@ static void print_mce(struct mce *m) KERN_EMERG "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->cpu, m->mcgstatus, m->bank, m->status); - if (m->rip) { + if (m->ip) { printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->rip); + m->cs, m->ip); if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->rip); + print_symbol("{%s}", m->ip); printk("\n"); } printk(KERN_EMERG "TSC %Lx ", m->tsc); @@ -156,16 +156,16 @@ static int mce_available(struct cpuinfo_ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) { if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { - m->rip = regs->rip; + m->ip = regs->ip; m->cs = regs->cs; } else { - m->rip = 0; + m->ip = 0; m->cs = 0; } if (rip_msr) { /* Assume the RIP in the MSR is exact. Is this true? */ m->mcgstatus |= MCG_STATUS_EIPV; - rdmsrl(rip_msr, m->rip); + rdmsrl(rip_msr, m->ip); m->cs = 0; } } @@ -288,7 +288,7 @@ void do_machine_check(struct pt_regs * r * instruction which caused the MCE. */ if (m.mcgstatus & MCG_STATUS_EIPV) - user_space = panicm.rip && (panicm.cs & 3); + user_space = panicm.ip && (panicm.cs & 3); /* * If we know that the error was in user space, send a diff -puN arch/x86/kernel/cpu/mcheck/mce_amd_64.c~git-x86 arch/x86/kernel/cpu/mcheck/mce_amd_64.c --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c~git-x86 +++ a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -118,6 +118,7 @@ void __cpuinit mce_amd_feature_init(stru { unsigned int bank, block; unsigned int cpu = smp_processor_id(); + u8 lvt_off; u32 low = 0, high = 0, address = 0; for (bank = 0; bank < NR_BANKS; ++bank) { @@ -153,14 +154,13 @@ void __cpuinit mce_amd_feature_init(stru if (shared_bank[bank] && c->cpu_core_id) break; #endif + lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, + APIC_EILVT_MSG_FIX, 0); + high &= ~MASK_LVTOFF_HI; - high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20; + high |= lvt_off << 20; wrmsr(address, low, high); - setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD, - THRESHOLD_APIC_VECTOR, - K8_APIC_EXT_INT_MSG_FIX, 0); - threshold_defaults.address = address; threshold_restart_bank(&threshold_defaults, 0, 0); } diff -puN arch/x86/kernel/cpu/mtrr/amd.c~git-x86 arch/x86/kernel/cpu/mtrr/amd.c --- a/arch/x86/kernel/cpu/mtrr/amd.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/amd.c @@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int re The base address of the region. The size of the region. If this is 0 the region is disabled. The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. [RETURNS] Nothing. */ { diff -puN arch/x86/kernel/cpu/mtrr/cyrix.c~git-x86 arch/x86/kernel/cpu/mtrr/cyrix.c --- a/arch/x86/kernel/cpu/mtrr/cyrix.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "mtrr.h" int arr3_protected; @@ -142,7 +143,7 @@ static void prepare_set(void) /* Disable and flush caches. Note that wbinvd flushes the TLBs as a side-effect */ - cr0 = read_cr0() | 0x40000000; + cr0 = read_cr0() | X86_CR0_CD; wbinvd(); write_cr0(cr0); wbinvd(); diff -puN arch/x86/kernel/cpu/mtrr/generic.c~git-x86 arch/x86/kernel/cpu/mtrr/generic.c --- a/arch/x86/kernel/cpu/mtrr/generic.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/generic.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "mtrr.h" @@ -188,7 +189,7 @@ static inline void k8_enable_fixed_iorrs * \param changed pointer which indicates whether the MTRR needed to be changed * \param msrwords pointer to the MSR values which the MSR should have */ -static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) +static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) { unsigned lo, hi; @@ -200,7 +201,7 @@ static void set_fixed_range(int msr, int ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); - *changed = TRUE; + *changed = true; } } @@ -260,7 +261,7 @@ static void generic_get_mtrr(unsigned in static int set_fixed_ranges(mtrr_type * frs) { unsigned long long *saved = (unsigned long long *) frs; - int changed = FALSE; + bool changed = false; int block=-1, range; while (fixed_range_blocks[++block].ranges) @@ -273,17 +274,17 @@ static int set_fixed_ranges(mtrr_type * /* Set the MSR pair relating to a var range. Returns TRUE if changes are made */ -static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) +static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) { unsigned int lo, hi; - int changed = FALSE; + bool changed = false; rdmsr(MTRRphysBase_MSR(index), lo, hi); if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - changed = TRUE; + changed = true; } rdmsr(MTRRphysMask_MSR(index), lo, hi); @@ -292,7 +293,7 @@ static int set_mtrr_var_ranges(unsigned || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); - changed = TRUE; + changed = true; } return changed; } @@ -350,7 +351,7 @@ static void prepare_set(void) __acquires spin_lock(&set_atomicity_lock); /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ - cr0 = read_cr0() | 0x40000000; /* set CD flag */ + cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); wbinvd(); @@ -417,8 +418,6 @@ static void generic_set_mtrr(unsigned in The base address of the region. The size of the region. If this is 0 the region is disabled. The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. [RETURNS] Nothing. */ { diff -puN arch/x86/kernel/cpu/mtrr/if.c~git-x86 arch/x86/kernel/cpu/mtrr/if.c --- a/arch/x86/kernel/cpu/mtrr/if.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/if.c @@ -37,7 +37,7 @@ const char *mtrr_attrib_to_str(int x) static int mtrr_file_add(unsigned long base, unsigned long size, - unsigned int type, char increment, struct file *file, int page) + unsigned int type, bool increment, struct file *file, int page) { int reg, max; unsigned int *fcount = FILE_FCOUNT(file); @@ -55,7 +55,7 @@ mtrr_file_add(unsigned long base, unsign base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; } - reg = mtrr_add_page(base, size, type, 1); + reg = mtrr_add_page(base, size, type, true); if (reg >= 0) ++fcount[reg]; return reg; @@ -141,7 +141,7 @@ mtrr_write(struct file *file, const char size >>= PAGE_SHIFT; err = mtrr_add_page((unsigned long) base, (unsigned long) size, i, - 1); + true); if (err < 0) return err; return len; @@ -217,7 +217,7 @@ mtrr_ioctl(struct file *file, unsigned i if (!capable(CAP_SYS_ADMIN)) return -EPERM; err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 0); break; case MTRRIOC_SET_ENTRY: @@ -226,7 +226,7 @@ mtrr_ioctl(struct file *file, unsigned i #endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); + err = mtrr_add(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_ENTRY: #ifdef CONFIG_COMPAT @@ -270,7 +270,7 @@ mtrr_ioctl(struct file *file, unsigned i if (!capable(CAP_SYS_ADMIN)) return -EPERM; err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 1); break; case MTRRIOC_SET_PAGE_ENTRY: @@ -279,7 +279,8 @@ mtrr_ioctl(struct file *file, unsigned i #endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); + err = + mtrr_add_page(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_PAGE_ENTRY: #ifdef CONFIG_COMPAT diff -puN arch/x86/kernel/cpu/mtrr/main.c~git-x86 arch/x86/kernel/cpu/mtrr/main.c --- a/arch/x86/kernel/cpu/mtrr/main.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/main.c @@ -311,7 +311,7 @@ static void set_mtrr(unsigned int reg, u */ int mtrr_add_page(unsigned long base, unsigned long size, - unsigned int type, char increment) + unsigned int type, bool increment) { int i, replace, error; mtrr_type ltype; @@ -394,7 +394,9 @@ int mtrr_add_page(unsigned long base, un if (likely(replace < 0)) usage_table[i] = 1; else { - usage_table[i] = usage_table[replace] + !!increment; + usage_table[i] = usage_table[replace]; + if (increment) + usage_table[i]++; if (unlikely(replace != i)) { set_mtrr(replace, 0, 0, 0); usage_table[replace] = 0; @@ -460,7 +462,7 @@ static int mtrr_check(unsigned long base int mtrr_add(unsigned long base, unsigned long size, unsigned int type, - char increment) + bool increment) { if (mtrr_check(base, size)) return -EINVAL; diff -puN arch/x86/kernel/cpu/mtrr/mtrr.h~git-x86 arch/x86/kernel/cpu/mtrr/mtrr.h --- a/arch/x86/kernel/cpu/mtrr/mtrr.h~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -2,10 +2,8 @@ * local mtrr defines. */ -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif +#include +#include #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff diff -puN arch/x86/kernel/cpu/mtrr/state.c~git-x86 arch/x86/kernel/cpu/mtrr/state.c --- a/arch/x86/kernel/cpu/mtrr/state.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/state.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "mtrr.h" @@ -25,7 +26,7 @@ void set_mtrr_prepare_save(struct set_mt /* Disable and flush caches. Note that wbinvd flushes the TLBs as a side-effect */ - cr0 = read_cr0() | 0x40000000; + cr0 = read_cr0() | X86_CR0_CD; wbinvd(); write_cr0(cr0); wbinvd(); diff -puN arch/x86/kernel/cpu/perfctr-watchdog.c~git-x86 arch/x86/kernel/cpu/perfctr-watchdog.c --- a/arch/x86/kernel/cpu/perfctr-watchdog.c~git-x86 +++ a/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int ms clear_bit(counter, evntsel_nmi_owner); } -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); EXPORT_SYMBOL(reserve_perfctr_nmi); EXPORT_SYMBOL(release_perfctr_nmi); diff -puN arch/x86/kernel/doublefault_32.c~git-x86 arch/x86/kernel/doublefault_32.c --- a/arch/x86/kernel/doublefault_32.c~git-x86 +++ a/arch/x86/kernel/doublefault_32.c @@ -35,12 +35,13 @@ static void doublefault_fn(void) if (ptr_ok(tss)) { struct i386_hw_tss *t = (struct i386_hw_tss *)tss; - printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp); + printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", + t->ip, t->sp); printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", - t->eax, t->ebx, t->ecx, t->edx); + t->ax, t->bx, t->cx, t->dx); printk(KERN_EMERG "esi = %08lx, edi = %08lx\n", - t->esi, t->edi); + t->si, t->di); } } @@ -50,15 +51,15 @@ static void doublefault_fn(void) struct tss_struct doublefault_tss __cacheline_aligned = { .x86_tss = { - .esp0 = STACK_START, + .sp0 = STACK_START, .ss0 = __KERNEL_DS, .ldt = 0, .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - .eip = (unsigned long) doublefault_fn, + .ip = (unsigned long) doublefault_fn, /* 0x2 bit is always set */ - .eflags = X86_EFLAGS_SF | 0x2, - .esp = STACK_START, + .flags = X86_EFLAGS_SF | 0x2, + .sp = STACK_START, .es = __USER_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, diff -puN /dev/null arch/x86/kernel/ds.c --- /dev/null +++ a/arch/x86/kernel/ds.c @@ -0,0 +1,429 @@ +/* + * Debug Store support + * + * This provides a low-level interface to the hardware's Debug Store + * feature that is used for last branch recording (LBR) and + * precise-event based sampling (PEBS). + * + * Different architectures use a different DS layout/pointer size. + * The below functions therefore work on a void*. + * + * + * Since there is no user for PEBS, yet, only LBR (or branch + * trace store, BTS) is supported. + * + * + * Copyright (C) 2007 Intel Corporation. + * Markus Metzger , Dec 2007 + */ + +#include + +#include +#include +#include + + +/* + * Debug Store (DS) save area configuration (see Intel64 and IA32 + * Architectures Software Developer's Manual, section 18.5) + * + * The DS configuration consists of the following fields; different + * architetures vary in the size of those fields. + * - double-word aligned base linear address of the BTS buffer + * - write pointer into the BTS buffer + * - end linear address of the BTS buffer (one byte beyond the end of + * the buffer) + * - interrupt pointer into BTS buffer + * (interrupt occurs when write pointer passes interrupt pointer) + * - double-word aligned base linear address of the PEBS buffer + * - write pointer into the PEBS buffer + * - end linear address of the PEBS buffer (one byte beyond the end of + * the buffer) + * - interrupt pointer into PEBS buffer + * (interrupt occurs when write pointer passes interrupt pointer) + * - value to which counter is reset following counter overflow + * + * On later architectures, the last branch recording hardware uses + * 64bit pointers even in 32bit mode. + * + * + * Branch Trace Store (BTS) records store information about control + * flow changes. They at least provide the following information: + * - source linear address + * - destination linear address + * + * Netburst supported a predicated bit that had been dropped in later + * architectures. We do not suppor it. + * + * + * In order to abstract from the actual DS and BTS layout, we describe + * the access to the relevant fields. + * Thanks to Andi Kleen for proposing this design. + * + * The implementation, however, is not as general as it might seem. In + * order to stay somewhat simple and efficient, we assume an + * underlying unsigned type (mostly a pointer type) and we expect the + * field to be at least as big as that type. + */ + +/* + * A special from_ip address to indicate that the BTS record is an + * info record that needs to be interpreted or skipped. + */ +#define BTS_ESCAPE_ADDRESS (-1) + +/* + * A field access descriptor + */ +struct access_desc { + unsigned char offset; + unsigned char size; +}; + +/* + * The configuration for a particular DS/BTS hardware implementation. + */ +struct ds_configuration { + /* the DS configuration */ + unsigned char sizeof_ds; + struct access_desc bts_buffer_base; + struct access_desc bts_index; + struct access_desc bts_absolute_maximum; + struct access_desc bts_interrupt_threshold; + /* the BTS configuration */ + unsigned char sizeof_bts; + struct access_desc from_ip; + struct access_desc to_ip; + /* BTS variants used to store additional information like + timestamps */ + struct access_desc info_type; + struct access_desc info_data; + unsigned long debugctl_mask; +}; + +/* + * The global configuration used by the below accessor functions + */ +static struct ds_configuration ds_cfg; + +/* + * Accessor functions for some DS and BTS fields using the above + * global ptrace_bts_cfg. + */ +static inline void *get_bts_buffer_base(char *base) +{ + return *(void **)(base + ds_cfg.bts_buffer_base.offset); +} +static inline void set_bts_buffer_base(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value; +} +static inline void *get_bts_index(char *base) +{ + return *(void **)(base + ds_cfg.bts_index.offset); +} +static inline void set_bts_index(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_index.offset)) = value; +} +static inline void *get_bts_absolute_maximum(char *base) +{ + return *(void **)(base + ds_cfg.bts_absolute_maximum.offset); +} +static inline void set_bts_absolute_maximum(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value; +} +static inline void *get_bts_interrupt_threshold(char *base) +{ + return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset); +} +static inline void set_bts_interrupt_threshold(char *base, void *value) +{ + (*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; +} +static inline long get_from_ip(char *base) +{ + return *(long *)(base + ds_cfg.from_ip.offset); +} +static inline void set_from_ip(char *base, long value) +{ + (*(long *)(base + ds_cfg.from_ip.offset)) = value; +} +static inline long get_to_ip(char *base) +{ + return *(long *)(base + ds_cfg.to_ip.offset); +} +static inline void set_to_ip(char *base, long value) +{ + (*(long *)(base + ds_cfg.to_ip.offset)) = value; +} +static inline unsigned char get_info_type(char *base) +{ + return *(unsigned char *)(base + ds_cfg.info_type.offset); +} +static inline void set_info_type(char *base, unsigned char value) +{ + (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; +} +/* + * The info data might overlap with the info type on some architectures. + * We therefore read and write the exact number of bytes. + */ +static inline unsigned long long get_info_data(char *base) +{ + unsigned long long value = 0; + memcpy(&value, + base + ds_cfg.info_data.offset, + ds_cfg.info_data.size); + return value; +} +static inline void set_info_data(char *base, unsigned long long value) +{ + memcpy(base + ds_cfg.info_data.offset, + &value, + ds_cfg.info_data.size); +} + + +int ds_allocate(void **dsp, size_t bts_size_in_records) +{ + size_t bts_size_in_bytes = 0; + void *bts = 0; + void *ds = 0; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + if (bts_size_in_records < 0) + return -EINVAL; + + bts_size_in_bytes = + bts_size_in_records * ds_cfg.sizeof_bts; + + if (bts_size_in_bytes <= 0) + return -EINVAL; + + bts = kzalloc(bts_size_in_bytes, GFP_KERNEL); + + if (!bts) + return -ENOMEM; + + ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + + if (!ds) { + kfree(bts); + return -ENOMEM; + } + + set_bts_buffer_base(ds, bts); + set_bts_index(ds, bts); + set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); + set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); + + *dsp = ds; + return 0; +} + +int ds_free(void **dsp) +{ + if (*dsp) + kfree(get_bts_buffer_base(*dsp)); + kfree(*dsp); + *dsp = 0; + + return 0; +} + +int ds_get_bts_size(void *ds) +{ + size_t size_in_bytes; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + size_in_bytes = + get_bts_absolute_maximum(ds) - + get_bts_buffer_base(ds); + + return size_in_bytes / ds_cfg.sizeof_bts; +} + +int ds_get_bts_index(void *ds) +{ + size_t index_offset_in_bytes; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + index_offset_in_bytes = + get_bts_index(ds) - + get_bts_buffer_base(ds); + + return index_offset_in_bytes / ds_cfg.sizeof_bts; +} + +int ds_read_bts(void *ds, size_t index, struct bts_struct *out) +{ + void *bts; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + if (index < 0) + return -EINVAL; + + if (index >= ds_get_bts_size(ds)) + return -EINVAL; + + bts = get_bts_buffer_base(ds); + bts = (char *)bts + (index * ds_cfg.sizeof_bts); + + memset(out, 0, sizeof(*out)); + if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { + out->qualifier = get_info_type(bts); + out->variant.timestamp = get_info_data(bts); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = get_from_ip(bts); + out->variant.lbr.to_ip = get_to_ip(bts); + } + + return 0; +} + +int ds_write_bts(void *ds, const struct bts_struct *in) +{ + void *bts; + + if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) + return -EOPNOTSUPP; + + if (ds_get_bts_size(ds) <= 0) + return -ENXIO; + + bts = get_bts_index(ds); + + memset(bts, 0, ds_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; + + case BTS_BRANCH: + set_from_ip(bts, in->variant.lbr.from_ip); + set_to_ip(bts, in->variant.lbr.to_ip); + break; + + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + set_from_ip(bts, BTS_ESCAPE_ADDRESS); + set_info_type(bts, in->qualifier); + set_info_data(bts, in->variant.timestamp); + break; + + default: + return -EINVAL; + } + + bts = (char *)bts + ds_cfg.sizeof_bts; + if (bts >= get_bts_absolute_maximum(ds)) + bts = get_bts_buffer_base(ds); + set_bts_index(ds, bts); + + return 0; +} + +unsigned long ds_debugctl_mask(void) +{ + return ds_cfg.debugctl_mask; +} + +#ifdef __i386__ +static const struct ds_configuration ds_cfg_netburst = { + .sizeof_ds = 9 * 4, + .bts_buffer_base = { 0, 4 }, + .bts_index = { 4, 4 }, + .bts_absolute_maximum = { 8, 4 }, + .bts_interrupt_threshold = { 12, 4 }, + .sizeof_bts = 3 * 4, + .from_ip = { 0, 4 }, + .to_ip = { 4, 4 }, + .info_type = { 4, 1 }, + .info_data = { 5, 7 }, + .debugctl_mask = (1<<2)|(1<<3) +}; + +static const struct ds_configuration ds_cfg_pentium_m = { + .sizeof_ds = 9 * 4, + .bts_buffer_base = { 0, 4 }, + .bts_index = { 4, 4 }, + .bts_absolute_maximum = { 8, 4 }, + .bts_interrupt_threshold = { 12, 4 }, + .sizeof_bts = 3 * 4, + .from_ip = { 0, 4 }, + .to_ip = { 4, 4 }, + .info_type = { 4, 1 }, + .info_data = { 5, 7 }, + .debugctl_mask = (1<<6)|(1<<7) +}; +#endif /* _i386_ */ + +static const struct ds_configuration ds_cfg_core2 = { + .sizeof_ds = 9 * 8, + .bts_buffer_base = { 0, 8 }, + .bts_index = { 8, 8 }, + .bts_absolute_maximum = { 16, 8 }, + .bts_interrupt_threshold = { 24, 8 }, + .sizeof_bts = 3 * 8, + .from_ip = { 0, 8 }, + .to_ip = { 8, 8 }, + .info_type = { 8, 1 }, + .info_data = { 9, 7 }, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void +ds_configure(const struct ds_configuration *cfg) +{ + ds_cfg = *cfg; +} + +void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { +#ifdef __i386__ + case 0xD: + case 0xE: /* Pentium M */ + ds_configure(&ds_cfg_pentium_m); + break; +#endif /* _i386_ */ + case 0xF: /* Core2 */ + ds_configure(&ds_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { +#ifdef __i386__ + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + ds_configure(&ds_cfg_netburst); + break; +#endif /* _i386_ */ + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} diff -puN arch/x86/kernel/e820_32.c~git-x86 arch/x86/kernel/e820_32.c --- a/arch/x86/kernel/e820_32.c~git-x86 +++ a/arch/x86/kernel/e820_32.c @@ -37,26 +37,6 @@ unsigned long pci_mem_start = 0x10000000 EXPORT_SYMBOL(pci_mem_start); #endif extern int user_defined_memmap; -struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; static struct resource system_rom_resource = { .name = "System ROM", @@ -111,60 +91,6 @@ static struct resource video_rom_resourc .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM }; -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - #define ROMSIGNATURE 0xaa55 static int __init romsignature(const unsigned char *rom) @@ -260,10 +186,9 @@ static void __init probe_roms(void) * Request address space for all standard RAM and ROM resources * and also for regions reported as reserved by the e820. */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource) +void __init legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -305,35 +230,6 @@ legacy_init_iomem_resources(struct resou } } -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, - &data_resource, &bss_resource); - else - legacy_init_iomem_resources(&code_resource, - &data_resource, &bss_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); - #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) /** * e820_mark_nosave_regions - Find the ranges of physical addresses that do not diff -puN arch/x86/kernel/e820_64.c~git-x86 arch/x86/kernel/e820_64.c --- a/arch/x86/kernel/e820_64.c~git-x86 +++ a/arch/x86/kernel/e820_64.c @@ -1,4 +1,4 @@ -/* +/* * Handle the memory map. * The functions here do the job until bootmem takes over. * @@ -26,47 +26,45 @@ #include #include #include +#include struct e820map e820; -/* +/* * PFN of last memory page. */ -unsigned long end_pfn; -EXPORT_SYMBOL(end_pfn); +unsigned long end_pfn; -/* +/* * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. * The direct mapping extends to end_pfn_map, so that we can directly access * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long end_pfn_map; + */ +unsigned long end_pfn_map; -/* +/* * Last pfn which the user wants to use. */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource, bss_resource; - -/* Check for some hardcoded bad areas that early boot is not allowed to touch */ +/* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) -{ - unsigned long addr = *addrp, last = addr + size; +{ + unsigned long addr = *addrp, last = addr + size; /* various gunk below that needed for SMP startup */ - if (addr < 0x8000) { + if (addr < 0x8000) { *addrp = PAGE_ALIGN(0x8000); - return 1; + return 1; } /* direct mapping tables of the kernel */ - if (last >= table_start<= table_start<= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { @@ -97,9 +95,9 @@ static inline int bad_addr(unsigned long return 1; } #endif - /* XXX ramdisk image here? */ + /* XXX ramdisk image here? */ return 0; -} +} /* * This function checks if any part of the range is mapped @@ -107,16 +105,18 @@ static inline int bad_addr(unsigned long */ int e820_any_mapped(unsigned long start, unsigned long end, unsigned type) -{ +{ int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) continue; if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } + continue; + return 1; + } return 0; } EXPORT_SYMBOL_GPL(e820_any_mapped); @@ -127,11 +127,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); * Note: this function only works correct if the e820 table is sorted and * not-overlapping, which is the case */ -int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +int __init e820_all_mapped(unsigned long start, unsigned long end, + unsigned type) { int i; + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) continue; /* is the region (part) in overlap with the current region ?*/ @@ -143,65 +146,73 @@ int __init e820_all_mapped(unsigned long */ if (ei->addr <= start) start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full coverage */ + /* + * if start is now at or beyond end, we're done, full + * coverage + */ if (start >= end) - return 1; /* we're done */ + return 1; } return 0; } -/* - * Find a free area in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr = ei->addr, last; - if (ei->type != E820_RAM) - continue; - if (addr < start) +/* + * Find a free area in a specific range. + */ +unsigned long __init find_e820_area(unsigned long start, unsigned long end, + unsigned size) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long addr = ei->addr, last; + + if (ei->type != E820_RAM) + continue; + if (addr < start) addr = start; - if (addr > ei->addr + ei->size) - continue; + if (addr > ei->addr + ei->size) + continue; while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; last = PAGE_ALIGN(addr) + size; if (last > ei->addr + ei->size) continue; - if (last > end) + if (last > end) continue; - return addr; - } - return -1UL; -} + return addr; + } + return -1UL; +} /* * Find the highest page frame number we have available */ unsigned long __init e820_end_of_ram(void) { - unsigned long end_pfn = 0; + unsigned long end_pfn; + end_pfn = find_max_pfn_with_active_regions(); - - if (end_pfn > end_pfn_map) + + if (end_pfn > end_pfn_map) end_pfn_map = end_pfn; if (end_pfn_map > MAXMEM>>PAGE_SHIFT) end_pfn_map = MAXMEM>>PAGE_SHIFT; if (end_pfn > end_user_pfn) end_pfn = end_user_pfn; - if (end_pfn > end_pfn_map) - end_pfn = end_pfn_map; + if (end_pfn > end_pfn_map) + end_pfn = end_pfn_map; - printk("end_pfn_map = %lu\n", end_pfn_map); - return end_pfn; + printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map); + return end_pfn; } /* * Mark e820 reserved areas as busy for the resource manager. */ -void __init e820_reserve_resources(void) +void __init e820_reserve_resources(struct resource *code_resource, + struct resource *data_resource, struct resource *bss_resource) { int i; for (i = 0; i < e820.nr_map; i++) { @@ -219,13 +230,13 @@ void __init e820_reserve_resources(void) request_resource(&iomem_resource, res); if (e820.map[i].type == E820_RAM) { /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. */ - request_resource(res, &code_resource); - request_resource(res, &data_resource); - request_resource(res, &bss_resource); + request_resource(res, code_resource); + request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) request_resource(res, &crashk_res); @@ -322,9 +333,9 @@ e820_register_active_regions(int nid, un add_active_range(nid, ei_startpfn, ei_endpfn); } -/* +/* * Add a memory region to the kernel e820 map. - */ + */ void __init add_memory_region(unsigned long start, unsigned long size, int type) { int x = e820.nr_map; @@ -349,9 +360,7 @@ unsigned long __init e820_hole_size(unsi { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn; - unsigned long ei_endpfn; - unsigned long ram = 0; + unsigned long ei_startpfn, ei_endpfn, ram = 0; int i; for (i = 0; i < e820.nr_map; i++) { @@ -363,28 +372,31 @@ unsigned long __init e820_hole_size(unsi return end - start - (ram << PAGE_SHIFT); } -void __init e820_print_map(char *who) +static void __init e820_print_map(char *who) { int i; for (i = 0; i < e820.nr_map; i++) { printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) (e820.map[i].addr + e820.map[i].size)); + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; + case E820_RAM: + printk(KERN_CONT "(usable)\n"); + break; case E820_RESERVED: - printk("(reserved)\n"); - break; + printk(KERN_CONT "(reserved)\n"); + break; case E820_ACPI: - printk("(ACPI data)\n"); - break; + printk(KERN_CONT "(ACPI data)\n"); + break; case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; + printk(KERN_CONT "(ACPI NVS)\n"); + break; + default: + printk(KERN_CONT "type %u\n", e820.map[i].type); + break; } } } @@ -392,11 +404,11 @@ void __init e820_print_map(char *who) /* * Sanitize the BIOS e820 map. * - * Some e820 responses include overlapping entries. The following + * Some e820 responses include overlapping entries. The following * replaces the original e820 map with a new one, removing overlaps. * */ -static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) +static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -416,7 +428,8 @@ static int __init sanitize_e820_map(stru int i; /* - Visually we're performing the following (1,2,3,4 = memory types)... + Visually we're performing the following + (1,2,3,4 = memory types)... Sample memory map (w/overlaps): ____22__________________ @@ -458,22 +471,23 @@ static int __init sanitize_e820_map(stru old_nr = *pnr_map; /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; change_point[chgidx++]->pbios = &biosmap[i]; } } @@ -483,75 +497,106 @@ static int __init sanitize_e820_map(stru still_changing = 1; while (still_changing) { still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { change_tmp = change_point[i]; change_point[i] = change_point[i-1]; change_point[i-1] = change_tmp; - still_changing=1; + still_changing = 1; } } } /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ last_type = 0; /* start with undefined memory type */ last_addr = 0; /* start with 0 as last starting address */ + /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { + for (chgidx = 0; chgidx < chg_nr; chgidx++) { /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; } overlap_entries--; } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ current_type = 0; - for (i=0; itype > current_type) current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ + /* + * continue building up new bios map based on this + * information + */ if (current_type != last_type) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ + /* + * move forward only if the new size + * was non-zero + */ if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ + break; } if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; + last_addr = change_point[chgidx]->addr; } last_type = current_type; } } - new_nr = new_bios_entry; /* retain count for new bios entries */ + /* retain count for new bios entries */ + new_nr = new_bios_entry; /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); *pnr_map = new_nr; return 0; @@ -566,7 +611,7 @@ static int __init sanitize_e820_map(stru * will have given us a memory map that we can use to properly * set up memory. If we aren't, we'll fake a memory map. */ -static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) { /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) @@ -583,11 +628,11 @@ static int __init copy_e820_map(struct e return -1; add_memory_region(start, size, type); - } while (biosmap++,--nr_map); + } while (biosmap++, --nr_map); return 0; } -void early_panic(char *msg) +static void early_panic(char *msg) { early_printk(msg); panic(msg); @@ -613,9 +658,9 @@ static int __init parse_memopt(char *p) if (!p) return -EINVAL; end_user_pfn = memparse(p, &p); - end_user_pfn >>= PAGE_SHIFT; + end_user_pfn >>= PAGE_SHIFT; return 0; -} +} early_param("mem", parse_memopt); static int userdef __initdata; @@ -627,9 +672,9 @@ static int __init parse_memmap_opt(char if (!strcmp(p, "exactmap")) { #ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is * reset. */ e820_register_active_regions(0, 0, -1UL); @@ -646,6 +691,8 @@ static int __init parse_memmap_opt(char mem_size = memparse(p, &p); if (p == oldp) return -EINVAL; + + userdef = 1; if (*p == '@') { start_at = memparse(p+1, &p); add_memory_region(start_at, mem_size, E820_RAM); @@ -665,6 +712,12 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { + char nr = e820.nr_map; + + if (sanitize_e820_map(e820.map, &nr) < 0) + early_panic("Invalid user supplied memory map"); + e820.nr_map = nr; + printk(KERN_INFO "user-defined physical RAM map:\n"); e820_print_map("user"); } @@ -713,8 +766,10 @@ __init void e820_setup_gap(void) if (!found) { gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " + "address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource " + "registers may break!\n"); } /* @@ -727,8 +782,9 @@ __init void e820_setup_gap(void) /* Fun with two's complement */ pci_mem_start = (gapstart + round) & -round; - printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); + printk(KERN_INFO + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); } int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) diff -puN arch/x86/kernel/entry_32.S~git-x86 arch/x86/kernel/entry_32.S --- a/arch/x86/kernel/entry_32.S~git-x86 +++ a/arch/x86/kernel/entry_32.S @@ -58,7 +58,7 @@ * for paravirtualization. The following will never clobber any registers: * INTERRUPT_RETURN (aka. "iret") * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). + * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). * * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). @@ -283,12 +283,12 @@ END(resume_kernel) the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub -ENTRY(sysenter_entry) +ENTRY(ia32_sysenter_target) CFI_STARTPROC simple CFI_SIGNAL_FRAME CFI_DEF_CFA esp, 0 CFI_REGISTER esp, ebp - movl TSS_sysenter_esp0(%esp),%esp + movl TSS_sysenter_sp0(%esp),%esp sysenter_past_esp: /* * No need to follow this irqs on/off section: the syscall @@ -351,7 +351,7 @@ sysenter_past_esp: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs - ENABLE_INTERRUPTS_SYSEXIT + ENABLE_INTERRUPTS_SYSCALL_RET CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) @@ -360,7 +360,7 @@ sysenter_past_esp: .align 4 .long 1b,2b .popsection -ENDPROC(sysenter_entry) +ENDPROC(ia32_sysenter_target) # system call handler stub ENTRY(system_call) @@ -743,7 +743,7 @@ END(device_not_available) * that sets up the real kernel stack. Check here, since we can't * allow the wrong stack to be used. * - * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have + * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have * already pushed 3 words if it hits on the sysenter instruction: * eflags, cs and eip. * @@ -755,7 +755,7 @@ END(device_not_available) cmpw $__KERNEL_CS,4(%esp); \ jne ok; \ label: \ - movl TSS_sysenter_esp0+offset(%esp),%esp; \ + movl TSS_sysenter_sp0+offset(%esp),%esp; \ CFI_DEF_CFA esp, 0; \ CFI_UNDEFINED eip; \ pushfl; \ @@ -768,7 +768,7 @@ label: \ KPROBE_ENTRY(debug) RING0_INT_FRAME - cmpl $sysenter_entry,(%esp) + cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) debug_stack_correct: @@ -799,7 +799,7 @@ KPROBE_ENTRY(nmi) popl %eax CFI_ADJUST_CFA_OFFSET -4 je nmi_espfix_stack - cmpl $sysenter_entry,(%esp) + cmpl $ia32_sysenter_target,(%esp) je nmi_stack_fixup pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -812,7 +812,7 @@ KPROBE_ENTRY(nmi) popl %eax CFI_ADJUST_CFA_OFFSET -4 jae nmi_stack_correct - cmpl $sysenter_entry,12(%esp) + cmpl $ia32_sysenter_target,12(%esp) je nmi_debug_stack_check nmi_stack_correct: /* We have a RING0_INT_FRAME here */ @@ -882,10 +882,10 @@ ENTRY(native_iret) .previous END(native_iret) -ENTRY(native_irq_enable_sysexit) +ENTRY(native_irq_enable_syscall_ret) sti sysexit -END(native_irq_enable_sysexit) +END(native_irq_enable_syscall_ret) #endif KPROBE_ENTRY(int3) diff -puN arch/x86/kernel/geode_32.c~git-x86 arch/x86/kernel/geode_32.c --- a/arch/x86/kernel/geode_32.c~git-x86 +++ a/arch/x86/kernel/geode_32.c @@ -1,6 +1,7 @@ /* * AMD Geode southbridge support code * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007, Andres Salomon * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public License @@ -51,45 +52,62 @@ EXPORT_SYMBOL_GPL(geode_get_dev_base); /* === GPIO API === */ -void geode_gpio_set(unsigned int gpio, unsigned int reg) +void geode_gpio_set(u32 gpio, unsigned int reg) { u32 base = geode_get_dev_base(GEODE_DEV_GPIO); if (!base) return; - if (gpio < 16) - outl(1 << gpio, base + reg); - else - outl(1 << (gpio - 16), base + 0x80 + reg); + /* low bank register */ + if (gpio & 0xFFFF) + outl(gpio & 0xFFFF, base + reg); + /* high bank register */ + gpio >>= 16; + if (gpio) + outl(gpio, base + 0x80 + reg); } EXPORT_SYMBOL_GPL(geode_gpio_set); -void geode_gpio_clear(unsigned int gpio, unsigned int reg) +void geode_gpio_clear(u32 gpio, unsigned int reg) { u32 base = geode_get_dev_base(GEODE_DEV_GPIO); if (!base) return; - if (gpio < 16) - outl(1 << (gpio + 16), base + reg); - else - outl(1 << gpio, base + 0x80 + reg); + /* low bank register */ + if (gpio & 0xFFFF) + outl((gpio & 0xFFFF) << 16, base + reg); + /* high bank register */ + gpio &= (0xFFFF << 16); + if (gpio) + outl(gpio, base + 0x80 + reg); } EXPORT_SYMBOL_GPL(geode_gpio_clear); -int geode_gpio_isset(unsigned int gpio, unsigned int reg) +int geode_gpio_isset(u32 gpio, unsigned int reg) { u32 base = geode_get_dev_base(GEODE_DEV_GPIO); + u32 val; if (!base) return 0; - if (gpio < 16) - return (inl(base + reg) & (1 << gpio)) ? 1 : 0; - else - return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0; + /* low bank register */ + if (gpio & 0xFFFF) { + val = inl(base + reg) & (gpio & 0xFFFF); + if ((gpio & 0xFFFF) == val) + return 1; + } + /* high bank register */ + gpio >>= 16; + if (gpio) { + val = inl(base + 0x80 + reg) & gpio; + if (gpio == val) + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(geode_gpio_isset); diff -puN arch/x86/kernel/head64.c~git-x86 arch/x86/kernel/head64.c --- a/arch/x86/kernel/head64.c~git-x86 +++ a/arch/x86/kernel/head64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include static void __init zap_identity_mappings(void) { @@ -67,8 +69,6 @@ void __init x86_64_start_kernel(char * r pda_init(0); copy_bootdata(__va(real_mode_data)); -#ifdef CONFIG_SMP - cpu_set(0, cpu_online_map); -#endif + start_kernel(); } diff -puN arch/x86/kernel/hpet.c~git-x86 arch/x86/kernel/hpet.c --- a/arch/x86/kernel/hpet.c~git-x86 +++ a/arch/x86/kernel/hpet.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -16,7 +15,8 @@ #define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_SHIFT 22 -/* FSEC = 10^-15 NSEC = 10^-9 */ +/* FSEC = 10^-15 + NSEC = 10^-9 */ #define FSEC_PER_NSEC 1000000 /* diff -puN arch/x86/kernel/i386_ksyms_32.c~git-x86 arch/x86/kernel/i386_ksyms_32.c --- a/arch/x86/kernel/i386_ksyms_32.c~git-x86 +++ a/arch/x86/kernel/i386_ksyms_32.c @@ -22,12 +22,5 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -EXPORT_SYMBOL(__write_lock_failed); -EXPORT_SYMBOL(__read_lock_failed); -#endif - EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(empty_zero_page); diff -puN arch/x86/kernel/i8253.c~git-x86 arch/x86/kernel/i8253.c --- a/arch/x86/kernel/i8253.c~git-x86 +++ a/arch/x86/kernel/i8253.c @@ -31,9 +31,7 @@ struct clock_event_device *global_clock_ static void init_pit_timer(enum clock_event_mode mode, struct clock_event_device *evt) { - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); + spin_lock(&i8253_lock); switch(mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -62,7 +60,7 @@ static void init_pit_timer(enum clock_ev /* Nothing to do here */ break; } - spin_unlock_irqrestore(&i8253_lock, flags); + spin_unlock(&i8253_lock); } /* @@ -72,12 +70,10 @@ static void init_pit_timer(enum clock_ev */ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) { - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); + spin_lock(&i8253_lock); outb_p(delta & 0xff , PIT_CH0); /* LSB */ outb(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); + spin_unlock(&i8253_lock); return 0; } diff -puN arch/x86/kernel/i8259_32.c~git-x86 arch/x86/kernel/i8259_32.c --- a/arch/x86/kernel/i8259_32.c~git-x86 +++ a/arch/x86/kernel/i8259_32.c @@ -21,8 +21,6 @@ #include #include -#include - /* * This is the 'legacy' 8259A Programmable Interrupt Controller, * present in the majority of PC/AT boxes. @@ -341,7 +339,7 @@ static irqreturn_t math_error_irq(int cp outb(0,0xF0); if (ignore_fpu_irq || !boot_cpu_data.hard_math) return IRQ_NONE; - math_error((void __user *)get_irq_regs()->eip); + math_error((void __user *)get_irq_regs()->ip); return IRQ_HANDLED; } diff -puN arch/x86/kernel/i8259_64.c~git-x86 arch/x86/kernel/i8259_64.c --- a/arch/x86/kernel/i8259_64.c~git-x86 +++ a/arch/x86/kernel/i8259_64.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -48,7 +49,7 @@ */ /* - * The IO-APIC gives us many more interrupt sources. Most of these + * The IO-APIC gives us many more interrupt sources. Most of these * are unused but an SMP system is supposed to have enough memory ... * sometimes (mostly wrt. hw bugs) we get corrupted vectors all * across the spectrum, so we really want to be prepared to get all @@ -114,11 +115,7 @@ static struct irq_chip i8259A_chip = { /* * This contains the irq mask for both 8259A irq controllers, */ -static unsigned int cached_irq_mask = 0xffff; - -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21 (__byte(0,cached_irq_mask)) -#define cached_A1 (__byte(1,cached_irq_mask)) +unsigned int cached_irq_mask = 0xffff; /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) @@ -139,9 +136,9 @@ void disable_8259A_irq(unsigned int irq) spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) - outb(cached_A1,0xA1); + outb(cached_slave_mask, PIC_SLAVE_IMR); else - outb(cached_21,0x21); + outb(cached_master_mask, PIC_MASTER_IMR); spin_unlock_irqrestore(&i8259A_lock, flags); } @@ -153,9 +150,9 @@ void enable_8259A_irq(unsigned int irq) spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) - outb(cached_A1,0xA1); + outb(cached_slave_mask, PIC_SLAVE_IMR); else - outb(cached_21,0x21); + outb(cached_master_mask, PIC_MASTER_IMR); spin_unlock_irqrestore(&i8259A_lock, flags); } @@ -167,9 +164,9 @@ int i8259A_irq_pending(unsigned int irq) spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) - ret = inb(0x20) & mask; + ret = inb(PIC_MASTER_CMD) & mask; else - ret = inb(0xA0) & (mask >> 8); + ret = inb(PIC_SLAVE_CMD) & (mask >> 8); spin_unlock_irqrestore(&i8259A_lock, flags); return ret; @@ -196,14 +193,14 @@ static inline int i8259A_irq_real(unsign int irqmask = 1<> 8); - outb(0x0A,0xA0); /* back to the IRR register */ + outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ + value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); + outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ return value; } @@ -240,14 +237,17 @@ static void mask_and_ack_8259A(unsigned handle_real_irq: if (irq & 8) { - inb(0xA1); /* DUMMY - (do we need this?) */ - outb(cached_A1,0xA1); - outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */ - outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ + inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ + outb(cached_slave_mask, PIC_SLAVE_IMR); + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7),PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); } else { - inb(0x21); /* DUMMY - (do we need this?) */ - outb(cached_21,0x21); - outb(0x60+irq,0x20); /* 'Specific EOI' to master */ + inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ + outb(cached_master_mask, PIC_MASTER_IMR); + /* 'Specific EOI' to master */ + outb(0x60+irq,PIC_MASTER_CMD); } spin_unlock_irqrestore(&i8259A_lock, flags); return; @@ -270,7 +270,8 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } atomic_inc(&irq_err_count); @@ -283,51 +284,6 @@ spurious_8259A_irq: } } -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-2 */ - - /* - * outb_p - this has to work on a wide range of PC hardware. - */ - outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ - outb_p(IRQ0_VECTOR, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) - outb_p(0x03, 0x21); /* master does Auto EOI */ - else - outb_p(0x01, 0x21); /* master expects normal EOI */ - - outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ - outb_p(IRQ8_VECTOR, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ - outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode - is to be investigated) */ - - if (auto_eoi) - /* - * in AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_21, 0x21); /* restore master IRQ mask */ - outb(cached_A1, 0xA1); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - static char irq_trigger[2]; /** * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ @@ -364,8 +320,8 @@ static int i8259A_shutdown(struct sys_de * the kernel initialization code can get it * out of. */ - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-1 */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ return 0; } @@ -391,6 +347,58 @@ static int __init i8259A_init_sysfs(void device_initcall(i8259A_init_sysfs); +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + /* + * outb_p - this has to work on a wide range of PC hardware. + */ + outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_p(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ + outb_p(0x04, PIC_MASTER_IMR); + if (auto_eoi) /* master does Auto EOI */ + outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); + else /* master expects normal EOI */ + outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + + outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ + /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_p(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + + if (auto_eoi) + /* + * In AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_chip.mask_ack = disable_8259A_irq; + else + i8259A_chip.mask_ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + + + + /* * IRQ2 is cascade interrupt to second interrupt controller */ diff -puN arch/x86/kernel/init_task.c~git-x86 arch/x86/kernel/init_task.c --- a/arch/x86/kernel/init_task.c~git-x86 +++ a/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct files_struct init_files = static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); -EXPORT_SYMBOL(init_mm); /* * Initial thread structure. diff -puN arch/x86/kernel/io_apic_32.c~git-x86 arch/x86/kernel/io_apic_32.c --- a/arch/x86/kernel/io_apic_32.c~git-x86 +++ a/arch/x86/kernel/io_apic_32.c @@ -48,8 +48,6 @@ #include #include -#include "io_ports.h" - int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; diff -puN arch/x86/kernel/io_apic_64.c~git-x86 arch/x86/kernel/io_apic_64.c --- a/arch/x86/kernel/io_apic_64.c~git-x86 +++ a/arch/x86/kernel/io_apic_64.c @@ -35,6 +35,7 @@ #ifdef CONFIG_ACPI #include #endif +#include #include #include @@ -1069,7 +1070,7 @@ void __apicdebuginit print_local_APIC(vo v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); @@ -1171,7 +1172,7 @@ void __apicdebuginit print_PIC(void) #endif /* 0 */ -static void __init enable_IO_APIC(void) +void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; @@ -1408,7 +1409,7 @@ static void irq_complete_move(unsigned i if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_rax; + vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; @@ -1435,7 +1436,7 @@ static void ack_apic_level(unsigned int int do_unmask_irq = 0; irq_complete_move(irq); -#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) +#ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; @@ -1780,7 +1781,10 @@ __setup("no_timer_check", notimercheck); void __init setup_IO_APIC(void) { - enable_IO_APIC(); + + /* + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ if (acpi_ioapic) io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ @@ -2280,3 +2284,92 @@ void __init setup_ioapic_dest(void) } #endif +#define IOAPIC_RESOURCE_NAME_SIZE 11 + +static struct resource *ioapic_resources; + +static struct resource * __init ioapic_setup_resources(void) +{ + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + memset(mem, 0, n); + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} + +void __init ioapic_init_mappings(void) +{ + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + struct resource *ioapic_res; + int i; + + ioapic_res = ioapic_setup_resources(); + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].mpc_apicaddr; + } else { + ioapic_phys = (unsigned long) + alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = __pa(ioapic_phys); + } + set_fixmap_nocache(idx, ioapic_phys); + apic_printk(APIC_VERBOSE, + "mapped IOAPIC to %016lx (%016lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } + } +} + +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk(KERN_ERR + "IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); + diff -puN arch/x86/kernel/ioport_32.c~git-x86 arch/x86/kernel/ioport_32.c --- a/arch/x86/kernel/ioport_32.c~git-x86 +++ a/arch/x86/kernel/ioport_32.c @@ -16,49 +16,27 @@ #include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) +static void set_bitmap(unsigned long *bitmap, unsigned int base, + unsigned int extent, int new_value) { - unsigned long mask; - unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); - unsigned int low_index = base & (BITS_PER_LONG-1); - int length = low_index + extent; - - if (low_index != 0) { - mask = (~0UL << low_index); - if (length < BITS_PER_LONG) - mask &= ~(~0UL << length); - if (new_value) - *bitmap_base++ |= mask; - else - *bitmap_base++ &= ~mask; - length -= BITS_PER_LONG; - } - - mask = (new_value ? ~0UL : 0UL); - while (length >= BITS_PER_LONG) { - *bitmap_base++ = mask; - length -= BITS_PER_LONG; - } + unsigned int i; - if (length > 0) { - mask = ~(~0UL << length); + for (i = base; i < base + extent; i++) { if (new_value) - *bitmap_base++ |= mask; + __set_bit(i, bitmap); else - *bitmap_base++ &= ~mask; + __clear_bit(i, bitmap); } } - /* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { - unsigned long i, max_long, bytes, bytes_updated; struct thread_struct * t = ¤t->thread; struct tss_struct * tss; - unsigned long *bitmap; + unsigned long i, max_long; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -71,7 +49,8 @@ asmlinkage long sys_ioperm(unsigned long * this is why we delay this operation until now: */ if (!t->io_bitmap_ptr) { - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + if (!bitmap) return -ENOMEM; @@ -100,10 +79,7 @@ asmlinkage long sys_ioperm(unsigned long if (t->io_bitmap_ptr[i] != ~0UL) max_long = i; - bytes = (max_long + 1) * sizeof(long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; + t->io_bitmap_max = (max_long + 1) * sizeof(unsigned long); /* * Sets the lazy trigger so that the next I/O operation will @@ -124,17 +100,17 @@ asmlinkage long sys_ioperm(unsigned long * beyond the 0x3ff range: to get the full 65536 ports bitmapped * you'd need 8kB of bitmaps/process, which is a bit excessive. * - * Here we just change the eflags value on the stack: we allow + * Here we just change the flags value on the stack: we allow * only the super-user to do it. This depends on the stack-layout * on system-call entry - see also fork() and the signal handling * code. */ -asmlinkage long sys_iopl(unsigned long unused) +asmlinkage long sys_iopl(unsigned long regsp) { - volatile struct pt_regs * regs = (struct pt_regs *) &unused; - unsigned int level = regs->ebx; - unsigned int old = (regs->eflags >> 12) & 3; + volatile struct pt_regs *regs = (struct pt_regs *)®sp; + unsigned int level = regs->bx; + unsigned int old = (regs->flags >> 12) & 3; struct thread_struct *t = ¤t->thread; if (level > 3) @@ -144,8 +120,10 @@ asmlinkage long sys_iopl(unsigned long u if (!capable(CAP_SYS_RAWIO)) return -EPERM; } + t->iopl = level << 12; - regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | t->iopl; set_iopl_mask(t->iopl); + return 0; } diff -puN arch/x86/kernel/ioport_64.c~git-x86 arch/x86/kernel/ioport_64.c --- a/arch/x86/kernel/ioport_64.c~git-x86 +++ a/arch/x86/kernel/ioport_64.c @@ -95,7 +95,7 @@ asmlinkage long sys_ioperm(unsigned long * beyond the 0x3ff range: to get the full 65536 ports bitmapped * you'd need 8kB of bitmaps/process, which is a bit excessive. * - * Here we just change the eflags value on the stack: we allow + * Here we just change the flags value on the stack: we allow * only the super-user to do it. This depends on the stack-layout * on system-call entry - see also fork() and the signal handling * code. @@ -103,7 +103,7 @@ asmlinkage long sys_ioperm(unsigned long asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) { - unsigned int old = (regs->eflags >> 12) & 3; + unsigned int old = (regs->flags >> 12) & 3; if (level > 3) return -EINVAL; @@ -112,6 +112,6 @@ asmlinkage long sys_iopl(unsigned int le if (!capable(CAP_SYS_RAWIO)) return -EPERM; } - regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12); + regs->flags = (regs->flags &~ X86_EFLAGS_IOPL) | (level << 12); return 0; } diff -puN arch/x86/kernel/irq_32.c~git-x86 arch/x86/kernel/irq_32.c --- a/arch/x86/kernel/irq_32.c~git-x86 +++ a/arch/x86/kernel/irq_32.c @@ -70,7 +70,7 @@ fastcall unsigned int do_IRQ(struct pt_r { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int irq = ~regs->orig_eax; + int irq = ~regs->orig_ax; struct irq_desc *desc = irq_desc + irq; #ifdef CONFIG_4KSTACKS union irq_ctx *curctx, *irqctx; @@ -88,13 +88,13 @@ fastcall unsigned int do_IRQ(struct pt_r #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { - long esp; + long sp; __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (THREAD_SIZE - 1)); - if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { + "=r" (sp) : "0" (THREAD_SIZE - 1)); + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct thread_info)); + sp - sizeof(struct thread_info)); dump_stack(); } } @@ -112,7 +112,7 @@ fastcall unsigned int do_IRQ(struct pt_r * current stack (which is the irq stack already after all) */ if (curctx != irqctx) { - int arg1, arg2, ebx; + int arg1, arg2, bx; /* build the stack frame on the IRQ stack */ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); @@ -128,10 +128,10 @@ fastcall unsigned int do_IRQ(struct pt_r (curctx->tinfo.preempt_count & SOFTIRQ_MASK); asm volatile( - " xchgl %%ebx,%%esp \n" - " call *%%edi \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (ebx) + " xchgl %%ebx,%%esp \n" + " call *%%edi \n" + " movl %%ebx,%%esp \n" + : "=a" (arg1), "=d" (arg2), "=b" (bx) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) : "memory", "cc" diff -puN arch/x86/kernel/irq_64.c~git-x86 arch/x86/kernel/irq_64.c --- a/arch/x86/kernel/irq_64.c~git-x86 +++ a/arch/x86/kernel/irq_64.c @@ -20,6 +20,26 @@ atomic_t irq_err_count; +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But don't ack when the APIC is disabled. -AK + */ + if (!disable_apic) + ack_APIC_irq(); +} + #ifdef CONFIG_DEBUG_STACKOVERFLOW /* * Probabilistic stack overflow check: @@ -33,11 +53,11 @@ static inline void stack_overflow_check( u64 curbase = (u64)task_stack_page(current); static unsigned long warned = -60*HZ; - if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && - regs->rsp < curbase + sizeof(struct thread_info) + 128 && + if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && + regs->sp < curbase + sizeof(struct thread_info) + 128 && time_after(jiffies, warned + 60*HZ)) { - printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n", - current->comm, curbase, regs->rsp); + printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", + current->comm, curbase, regs->sp); show_stack(NULL,NULL); warned = jiffies; } @@ -142,7 +162,7 @@ asmlinkage unsigned int do_IRQ(struct pt struct pt_regs *old_regs = set_irq_regs(regs); /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_rax; + unsigned vector = ~regs->orig_ax; unsigned irq; exit_idle(); diff -puN arch/x86/kernel/kprobes_32.c~git-x86 arch/x86/kernel/kprobes_32.c --- a/arch/x86/kernel/kprobes_32.c~git-x86 +++ a/arch/x86/kernel/kprobes_32.c @@ -212,27 +212,40 @@ static void __kprobes set_current_kprobe { __get_cpu_var(current_kprobe) = p; kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); + = (regs->flags & (TF_MASK | IF_MASK)); if (is_IF_modifier(p->opcode)) kcb->kprobe_saved_eflags &= ~IF_MASK; } +static __always_inline void clear_btf(void) +{ + if (test_thread_flag(TIF_DEBUGCTLMSR)) + wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0); +} + +static __always_inline void restore_btf(void) +{ + if (test_thread_flag(TIF_DEBUGCTLMSR)) + wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0); +} + static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - regs->eflags |= TF_MASK; - regs->eflags &= ~IF_MASK; + clear_btf(); + regs->flags |= TF_MASK; + regs->flags &= ~IF_MASK; /*single step inline if the instruction is an int3*/ if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->eip = (unsigned long)p->addr; + regs->ip = (unsigned long)p->addr; else - regs->eip = (unsigned long)p->ainsn.insn; + regs->ip = (unsigned long)p->ainsn.insn; } /* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { - unsigned long *sara = (unsigned long *)®s->esp; + unsigned long *sara = (unsigned long *)®s->sp; ri->ret_addr = (kprobe_opcode_t *) *sara; @@ -251,7 +264,7 @@ static int __kprobes kprobe_handler(stru kprobe_opcode_t *addr; struct kprobe_ctlblk *kcb; - addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire @@ -266,8 +279,8 @@ static int __kprobes kprobe_handler(stru if (p) { if (kcb->kprobe_status == KPROBE_HIT_SS && *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { - regs->eflags &= ~TF_MASK; - regs->eflags |= kcb->kprobe_saved_eflags; + regs->flags &= ~TF_MASK; + regs->flags |= kcb->kprobe_saved_eflags; goto no_kprobe; } /* We have reentered the kprobe_handler(), since @@ -288,7 +301,7 @@ static int __kprobes kprobe_handler(stru * another cpu right after we hit, no further * handling of this interrupt is appropriate */ - regs->eip -= sizeof(kprobe_opcode_t); + regs->ip -= sizeof(kprobe_opcode_t); ret = 1; goto no_kprobe; } @@ -312,7 +325,7 @@ static int __kprobes kprobe_handler(stru * Back up over the (now missing) int3 and run * the original instruction. */ - regs->eip -= sizeof(kprobe_opcode_t); + regs->ip -= sizeof(kprobe_opcode_t); ret = 1; } /* Not one of ours: let kernel handle it */ @@ -331,7 +344,7 @@ ss_probe: if (p->ainsn.boostable == 1 && !p->post_handler){ /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); - regs->eip = (unsigned long)p->ainsn.insn; + regs->ip = (unsigned long)p->ainsn.insn; preempt_enable_no_resched(); return 1; } @@ -355,7 +368,7 @@ no_kprobe: asm volatile ( ".global kretprobe_trampoline\n" "kretprobe_trampoline: \n" " pushf\n" - /* skip cs, eip, orig_eax */ + /* skip cs, ip, orig_ax */ " subl $12, %esp\n" " pushl %fs\n" " pushl %ds\n" @@ -369,10 +382,10 @@ no_kprobe: " pushl %ebx\n" " movl %esp, %eax\n" " call trampoline_handler\n" - /* move eflags to cs */ + /* move flags to cs */ " movl 52(%esp), %edx\n" " movl %edx, 48(%esp)\n" - /* save true return address on eflags */ + /* save true return address on flags */ " movl %eax, 52(%esp)\n" " popl %ebx\n" " popl %ecx\n" @@ -381,7 +394,7 @@ no_kprobe: " popl %edi\n" " popl %ebp\n" " popl %eax\n" - /* skip eip, orig_eax, es, ds, fs */ + /* skip ip, orig_ax, es, ds, fs */ " addl $20, %esp\n" " popf\n" " ret\n"); @@ -402,9 +415,9 @@ fastcall void *__kprobes trampoline_hand spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); /* fixup registers */ - regs->xcs = __KERNEL_CS | get_kernel_rpl(); - regs->eip = trampoline_address; - regs->orig_eax = 0xffffffff; + regs->cs = __KERNEL_CS | get_kernel_rpl(); + regs->ip = trampoline_address; + regs->orig_ax = 0xffffffff; /* * It is possible to have multiple instances associated with a given @@ -465,11 +478,11 @@ fastcall void *__kprobes trampoline_hand * interrupt. We have to fix up the stack as follows: * * 0) Except in the case of absolute or indirect jump or call instructions, - * the new eip is relative to the copied instruction. We need to make + * the new ip is relative to the copied instruction. We need to make * it relative to the original instruction. * * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed eflags, and may need to be cleared. + * flags are set in the just-pushed flags, and may need to be cleared. * * 2) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. @@ -480,11 +493,11 @@ fastcall void *__kprobes trampoline_hand static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - unsigned long *tos = (unsigned long *)®s->esp; + unsigned long *tos = (unsigned long *)®s->sp; unsigned long copy_eip = (unsigned long)p->ainsn.insn; unsigned long orig_eip = (unsigned long)p->addr; - regs->eflags &= ~TF_MASK; + regs->flags &= ~TF_MASK; switch (p->ainsn.insn[0]) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); @@ -495,8 +508,8 @@ static void __kprobes resume_execution(s case 0xca: case 0xcb: case 0xcf: - case 0xea: /* jmp absolute -- eip is correct */ - /* eip is already adjusted, no more changes required */ + case 0xea: /* jmp absolute -- ip is correct */ + /* ip is already adjusted, no more changes required */ p->ainsn.boostable = 1; goto no_change; case 0xe8: /* call relative - Fix return addr */ @@ -509,14 +522,14 @@ static void __kprobes resume_execution(s if ((p->ainsn.insn[1] & 0x30) == 0x10) { /* * call absolute, indirect - * Fix return addr; eip is correct. + * Fix return addr; ip is correct. * But this is not boostable */ *tos = orig_eip + (*tos - copy_eip); goto no_change; } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* eip is correct. And this is boostable */ + /* ip is correct. And this is boostable */ p->ainsn.boostable = 1; goto no_change; } @@ -525,23 +538,25 @@ static void __kprobes resume_execution(s } if (p->ainsn.boostable == 0) { - if ((regs->eip > copy_eip) && - (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { + if ((regs->ip > copy_eip) && + (regs->ip - copy_eip) + 5 < MAX_INSN_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ - set_jmp_op((void *)regs->eip, - (void *)orig_eip + (regs->eip - copy_eip)); + set_jmp_op((void *)regs->ip, + (void *)orig_eip + (regs->ip - copy_eip)); p->ainsn.boostable = 1; } else { p->ainsn.boostable = -1; } } - regs->eip = orig_eip + (regs->eip - copy_eip); + regs->ip = orig_eip + (regs->ip - copy_eip); no_change: + restore_btf(); + return; } @@ -563,8 +578,8 @@ static int __kprobes post_kprobe_handler } resume_execution(cur, regs, kcb); - regs->eflags |= kcb->kprobe_saved_eflags; - trace_hardirqs_fixup_flags(regs->eflags); + regs->flags |= kcb->kprobe_saved_eflags; + trace_hardirqs_fixup_flags(regs->flags); /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -576,11 +591,11 @@ out: preempt_enable_no_resched(); /* - * if somebody else is singlestepping across a probe point, eflags + * if somebody else is singlestepping across a probe point, flags * will have TF set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (regs->eflags & TF_MASK) + if (regs->flags & TF_MASK) return 0; return 1; @@ -597,12 +612,12 @@ int __kprobes kprobe_fault_handler(struc /* * We are here because the instruction being single * stepped caused a page fault. We reset the current - * kprobe and the eip points back to the probe address + * kprobe and the ip points back to the probe address * and allow the page fault handler to continue as a * normal page fault. */ - regs->eip = (unsigned long)cur->addr; - regs->eflags |= kcb->kprobe_old_eflags; + regs->ip = (unsigned long)cur->addr; + regs->flags |= kcb->kprobe_old_eflags; if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -688,7 +703,7 @@ int __kprobes setjmp_pre_handler(struct struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_esp = ®s->esp; + kcb->jprobe_saved_esp = ®s->sp; addr = (unsigned long)(kcb->jprobe_saved_esp); /* @@ -700,9 +715,9 @@ int __kprobes setjmp_pre_handler(struct */ memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); - regs->eflags &= ~IF_MASK; + regs->flags &= ~IF_MASK; trace_hardirqs_off(); - regs->eip = (unsigned long)(jp->entry); + regs->ip = (unsigned long)(jp->entry); return 1; } @@ -721,17 +736,17 @@ void __kprobes jprobe_return(void) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->eip - 1); + u8 *addr = (u8 *) (regs->ip - 1); unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp); struct jprobe *jp = container_of(p, struct jprobe, kp); if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (®s->esp != kcb->jprobe_saved_esp) { + if (®s->sp != kcb->jprobe_saved_esp) { struct pt_regs *saved_regs = container_of(kcb->jprobe_saved_esp, - struct pt_regs, esp); - printk("current esp %p does not match saved esp %p\n", - ®s->esp, kcb->jprobe_saved_esp); + struct pt_regs, sp); + printk("current sp %p does not match saved sp %p\n", + ®s->sp, kcb->jprobe_saved_esp); printk("Saved registers for jprobe %p\n", jp); show_registers(saved_regs); printk("Current registers\n"); diff -puN arch/x86/kernel/kprobes_64.c~git-x86 arch/x86/kernel/kprobes_64.c --- a/arch/x86/kernel/kprobes_64.c~git-x86 +++ a/arch/x86/kernel/kprobes_64.c @@ -251,27 +251,40 @@ static void __kprobes set_current_kprobe { __get_cpu_var(current_kprobe) = p; kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags - = (regs->eflags & (TF_MASK | IF_MASK)); + = (regs->flags & (TF_MASK | IF_MASK)); if (is_IF_modifier(p->ainsn.insn)) kcb->kprobe_saved_rflags &= ~IF_MASK; } +static __always_inline void clear_btf(void) +{ + if (test_thread_flag(TIF_DEBUGCTLMSR)) + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); +} + +static __always_inline void restore_btf(void) +{ + if (test_thread_flag(TIF_DEBUGCTLMSR)) + wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr); +} + static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - regs->eflags |= TF_MASK; - regs->eflags &= ~IF_MASK; + clear_btf(); + regs->flags |= TF_MASK; + regs->flags &= ~IF_MASK; /*single step inline if the instruction is an int3*/ if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->rip = (unsigned long)p->addr; + regs->ip = (unsigned long)p->addr; else - regs->rip = (unsigned long)p->ainsn.insn; + regs->ip = (unsigned long)p->ainsn.insn; } /* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { - unsigned long *sara = (unsigned long *)regs->rsp; + unsigned long *sara = (unsigned long *)regs->sp; ri->ret_addr = (kprobe_opcode_t *) *sara; /* Replace the return addr with trampoline addr */ @@ -282,7 +295,7 @@ int __kprobes kprobe_handler(struct pt_r { struct kprobe *p; int ret = 0; - kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); + kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); struct kprobe_ctlblk *kcb; /* @@ -298,8 +311,8 @@ int __kprobes kprobe_handler(struct pt_r if (p) { if (kcb->kprobe_status == KPROBE_HIT_SS && *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { - regs->eflags &= ~TF_MASK; - regs->eflags |= kcb->kprobe_saved_rflags; + regs->flags &= ~TF_MASK; + regs->flags |= kcb->kprobe_saved_rflags; goto no_kprobe; } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { /* TODO: Provide re-entrancy from @@ -308,7 +321,7 @@ int __kprobes kprobe_handler(struct pt_r * the instruction of the new probe. */ arch_disarm_kprobe(p); - regs->rip = (unsigned long)p->addr; + regs->ip = (unsigned long)p->addr; reset_current_kprobe(); ret = 1; } else { @@ -332,7 +345,7 @@ int __kprobes kprobe_handler(struct pt_r * another cpu right after we hit, no further * handling of this interrupt is appropriate */ - regs->rip = (unsigned long)addr; + regs->ip = (unsigned long)addr; ret = 1; goto no_kprobe; } @@ -356,7 +369,7 @@ int __kprobes kprobe_handler(struct pt_r * Back up over the (now missing) int3 and run * the original instruction. */ - regs->rip = (unsigned long)addr; + regs->ip = (unsigned long)addr; ret = 1; } /* Not one of ours: let kernel handle it */ @@ -441,7 +454,7 @@ int __kprobes trampoline_probe_handler(s } kretprobe_assert(ri, orig_ret_address, trampoline_address); - regs->rip = orig_ret_address; + regs->ip = orig_ret_address; reset_current_kprobe(); spin_unlock_irqrestore(&kretprobe_lock, flags); @@ -471,11 +484,11 @@ int __kprobes trampoline_probe_handler(s * interrupt. We have to fix up the stack as follows: * * 0) Except in the case of absolute or indirect jump or call instructions, - * the new rip is relative to the copied instruction. We need to make + * the new ip is relative to the copied instruction. We need to make * it relative to the original instruction. * * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed eflags, and may need to be cleared. + * flags are set in the just-pushed flags, and may need to be cleared. * * 2) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. @@ -484,7 +497,7 @@ int __kprobes trampoline_probe_handler(s static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - unsigned long *tos = (unsigned long *)regs->rsp; + unsigned long *tos = (unsigned long *)regs->sp; unsigned long next_rip = 0; unsigned long copy_rip = (unsigned long)p->ainsn.insn; unsigned long orig_rip = (unsigned long)p->addr; @@ -503,8 +516,8 @@ static void __kprobes resume_execution(s case 0xcb: case 0xc2: case 0xca: - regs->eflags &= ~TF_MASK; - /* rip is already adjusted, no more changes required*/ + regs->flags &= ~TF_MASK; + /* ip is already adjusted, no more changes required*/ return; case 0xe8: /* call relative - Fix return addr */ *tos = orig_rip + (*tos - copy_rip); @@ -512,28 +525,30 @@ static void __kprobes resume_execution(s case 0xff: if ((insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ - /* Fix return addr; rip is correct. */ - next_rip = regs->rip; + /* Fix return addr; ip is correct. */ + next_rip = regs->ip; *tos = orig_rip + (*tos - copy_rip); } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* rip is correct. */ - next_rip = regs->rip; + /* ip is correct. */ + next_rip = regs->ip; } break; - case 0xea: /* jmp absolute -- rip is correct */ - next_rip = regs->rip; + case 0xea: /* jmp absolute -- ip is correct */ + next_rip = regs->ip; break; default: break; } - regs->eflags &= ~TF_MASK; + regs->flags &= ~TF_MASK; if (next_rip) { - regs->rip = next_rip; + regs->ip = next_rip; } else { - regs->rip = orig_rip + (regs->rip - copy_rip); + regs->ip = orig_rip + (regs->ip - copy_rip); } + + restore_btf(); } int __kprobes post_kprobe_handler(struct pt_regs *regs) @@ -550,8 +565,8 @@ int __kprobes post_kprobe_handler(struct } resume_execution(cur, regs, kcb); - regs->eflags |= kcb->kprobe_saved_rflags; - trace_hardirqs_fixup_flags(regs->eflags); + regs->flags |= kcb->kprobe_saved_rflags; + trace_hardirqs_fixup_flags(regs->flags); /* Restore the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -563,11 +578,11 @@ out: preempt_enable_no_resched(); /* - * if somebody else is singlestepping across a probe point, eflags + * if somebody else is singlestepping across a probe point, flags * will have TF set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (regs->eflags & TF_MASK) + if (regs->flags & TF_MASK) return 0; return 1; @@ -585,12 +600,12 @@ int __kprobes kprobe_fault_handler(struc /* * We are here because the instruction being single * stepped caused a page fault. We reset the current - * kprobe and the rip points back to the probe address + * kprobe and the ip points back to the probe address * and allow the page fault handler to continue as a * normal page fault. */ - regs->rip = (unsigned long)cur->addr; - regs->eflags |= kcb->kprobe_old_rflags; + regs->ip = (unsigned long)cur->addr; + regs->flags |= kcb->kprobe_old_rflags; if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -620,9 +635,9 @@ int __kprobes kprobe_fault_handler(struc * In case the user-specified fault handler returned * zero, try to fix up. */ - fixup = search_exception_tables(regs->rip); + fixup = search_exception_tables(regs->ip); if (fixup) { - regs->rip = fixup->fixup; + regs->ip = fixup->fixup; return 1; } @@ -679,7 +694,7 @@ int __kprobes setjmp_pre_handler(struct struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_rsp = (long *) regs->rsp; + kcb->jprobe_saved_rsp = (long *) regs->sp; addr = (unsigned long)(kcb->jprobe_saved_rsp); /* * As Linus pointed out, gcc assumes that the callee @@ -690,9 +705,9 @@ int __kprobes setjmp_pre_handler(struct */ memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); - regs->eflags &= ~IF_MASK; + regs->flags &= ~IF_MASK; trace_hardirqs_off(); - regs->rip = (unsigned long)(jp->entry); + regs->ip = (unsigned long)(jp->entry); return 1; } @@ -711,17 +726,17 @@ void __kprobes jprobe_return(void) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->rip - 1); + u8 *addr = (u8 *) (regs->ip - 1); unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); struct jprobe *jp = container_of(p, struct jprobe, kp); if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if ((long *)regs->rsp != kcb->jprobe_saved_rsp) { + if ((long *)regs->sp != kcb->jprobe_saved_rsp) { struct pt_regs *saved_regs = container_of(kcb->jprobe_saved_rsp, - struct pt_regs, rsp); - printk("current rsp %p does not match saved rsp %p\n", - (long *)regs->rsp, kcb->jprobe_saved_rsp); + struct pt_regs, sp); + printk("current sp %p does not match saved sp %p\n", + (long *)regs->sp, kcb->jprobe_saved_rsp); printk("Saved registers for jprobe %p\n", jp); show_registers(saved_regs); printk("Current registers\n"); diff -puN /dev/null arch/x86/kernel/ldt.c --- /dev/null +++ a/arch/x86/kernel/ldt.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 2002 Andi Kleen + * + * This handles calls from both 32bit and 64bit mode. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +static void flush_ldt(void *null) +{ + if (current->active_mm) + load_LDT(¤t->active_mm->context); +} +#endif + +static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +{ + void *oldldt, *newldt; + int oldsize; + + if (mincount <= pc->size) + return 0; + oldsize = pc->size; + mincount = (mincount + 511) & (~511); + if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) + newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + else + newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); + + if (!newldt) + return -ENOMEM; + + if (oldsize) + memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); + oldldt = pc->ldt; + memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, + (mincount - oldsize) * LDT_ENTRY_SIZE); + +#ifdef CONFIG_X86_64 + /* CHECKME: Do we really need this ? */ + wmb(); +#endif + pc->ldt = newldt; + wmb(); + pc->size = mincount; + wmb(); + + if (reload) { +#ifdef CONFIG_SMP + cpumask_t mask; + + preempt_disable(); + load_LDT(pc); + mask = cpumask_of_cpu(smp_processor_id()); + if (!cpus_equal(current->mm->cpu_vm_mask, mask)) + smp_call_function(flush_ldt, NULL, 1, 1); + preempt_enable(); +#else + load_LDT(pc); +#endif + } + if (oldsize) { + if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(oldldt); + else + kfree(oldldt); + } + return 0; +} + +static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +{ + int err = alloc_ldt(new, old->size, 0); + + if (err < 0) + return err; + memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + return 0; +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + struct mm_struct *old_mm; + int retval = 0; + + mutex_init(&mm->context.lock); + mm->context.size = 0; + old_mm = current->mm; + if (old_mm && old_mm->context.size > 0) { + mutex_lock(&old_mm->context.lock); + retval = copy_ldt(&mm->context, &old_mm->context); + mutex_unlock(&old_mm->context.lock); + } + return retval; +} + +/* + * No need to lock the MM as we are the last user + * + * 64bit: Don't touch the LDT register - we're already in the next thread. + */ +void destroy_context(struct mm_struct *mm) +{ + if (mm->context.size) { +#ifdef CONFIG_X86_32 + /* CHECKME: Can this ever happen ? */ + if (mm == current->active_mm) + clear_LDT(); +#endif + if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(mm->context.ldt); + else + kfree(mm->context.ldt); + mm->context.size = 0; + } +} + +static int read_ldt(void __user *ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + struct mm_struct *mm = current->mm; + + if (!mm->context.size) + return 0; + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; + + mutex_lock(&mm->context.lock); + size = mm->context.size * LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + + err = 0; + if (copy_to_user(ptr, mm->context.ldt, size)) + err = -EFAULT; + mutex_unlock(&mm->context.lock); + if (err < 0) + goto error_return; + if (size != bytecount) { + /* zero-fill the rest */ + if (clear_user(ptr + size, bytecount - size) != 0) { + err = -EFAULT; + goto error_return; + } + } + return bytecount; +error_return: + return err; +} + +static int read_default_ldt(void __user *ptr, unsigned long bytecount) +{ + /* CHECKME: Can we use _one_ random number ? */ +#ifdef CONFIG_X86_32 + unsigned long size = 5 * sizeof(struct desc_struct); +#else + unsigned long size = 128; +#endif + if (bytecount > size) + bytecount = size; + if (clear_user(ptr, bytecount)) + return -EFAULT; + return bytecount; +} + +static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) +{ + struct mm_struct *mm = current->mm; + __u32 entry_1, entry_2; + int error; + struct user_desc ldt_info; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + error = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + error = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (oldmode) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + mutex_lock(&mm->context.lock); + if (ldt_info.entry_number >= mm->context.size) { + error = alloc_ldt(¤t->mm->context, + ldt_info.entry_number + 1, 1); + if (error < 0) + goto out_unlock; + } + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || LDT_empty(&ldt_info)) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); + + /* Install the new entry ... */ +install: + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, + entry_2); + error = 0; + +out_unlock: + mutex_unlock(&mm->context.lock); +out: + return error; +} + +asmlinkage int sys_modify_ldt(int func, void __user *ptr, + unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; +} diff -puN arch/x86/kernel/ldt_32.c~git-x86 /dev/null --- a/arch/x86/kernel/ldt_32.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - - err = 0; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (clear_user(ptr, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff -puN arch/x86/kernel/ldt_64.c~git-x86 /dev/null --- a/arch/x86/kernel/ldt_64.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - * Copyright (C) 2002 Andi Kleen - * - * This handles calls from both 32bit and 64bit mode. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) -{ - void *oldldt; - void *newldt; - unsigned oldsize; - - if (mincount <= (unsigned)pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - wmb(); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - - preempt_disable(); - mask = cpumask_of_cpu(smp_processor_id()); - load_LDT(pc); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * - * Don't touch the LDT register - we're already in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - /* Arbitrary number */ - /* x86-64 default LDT is all zeros */ - if (bytecount > 128) - bytecount = 128; - if (clear_user(ptr, bytecount)) - return -EFAULT; - return bytecount; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) -{ - struct task_struct *me = current; - struct mm_struct * mm = me->mm; - __u32 entry_1, entry_2, *lp; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, bytecount)) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= (unsigned)mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - *lp = entry_1; - *(lp+1) = entry_2; - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff -puN arch/x86/kernel/machine_kexec_64.c~git-x86 arch/x86/kernel/machine_kexec_64.c --- a/arch/x86/kernel/machine_kexec_64.c~git-x86 +++ a/arch/x86/kernel/machine_kexec_64.c @@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kim void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_SYMBOL(init_level4_pgt); - -#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif } diff -puN arch/x86/kernel/mpparse_32.c~git-x86 arch/x86/kernel/mpparse_32.c --- a/arch/x86/kernel/mpparse_32.c~git-x86 +++ a/arch/x86/kernel/mpparse_32.c @@ -258,7 +258,7 @@ static void __init MP_ioapic_info (struc if (!(m->mpc_flags & MPC_APIC_USABLE)) return; - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", @@ -405,9 +405,9 @@ static int __init smp_read_mpc(struct mp mps_oem_check(mpc, oem, str); - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); + printk("APIC at: 0x%X\n", mpc->mpc_lapic); - /* + /* * Save the local APIC address (it might be non-default) -- but only * if we're not using ACPI. */ @@ -918,14 +918,14 @@ void __init mp_register_ioapic(u8 id, u3 */ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + + mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, - mp_ioapic_routing[idx].gsi_end); + printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_base, + mp_ioapic_routing[idx].gsi_end); } void __init @@ -1041,13 +1041,14 @@ void __init mp_config_acpi_legacy_irqs ( } #define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 int mp_register_gsi(u32 gsi, int triggering, int polarity) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; - static int pci_irq = 16; + static int pci_irq = IRQ_COMPRESSION_START; /* * Mapping between Global System Interrups, which * represent all possible interrupts, and IRQs @@ -1086,12 +1087,16 @@ int mp_register_gsi(u32 gsi, int trigger if ((1<= 64, use IRQ compression + */ + if ((gsi >= IRQ_COMPRESSION_START) + && (triggering == ACPI_LEVEL_SENSITIVE)) { /* * For PCI devices assign IRQs in order, avoiding gaps * due to unused I/O APIC pins. diff -puN arch/x86/kernel/nmi_32.c~git-x86 arch/x86/kernel/nmi_32.c --- a/arch/x86/kernel/nmi_32.c~git-x86 +++ a/arch/x86/kernel/nmi_32.c @@ -52,13 +52,13 @@ static int unknown_nmi_panic_callback(st static int endflag __initdata = 0; +#ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all * CPUs during the test make them busy. */ static __init void nmi_cpu_busy(void *data) { -#ifdef CONFIG_SMP local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, @@ -68,8 +68,8 @@ static __init void nmi_cpu_busy(void *da care if they get somewhat less cycles. */ while (endflag == 0) mb(); -#endif } +#endif static int __init check_nmi_watchdog(void) { @@ -88,11 +88,13 @@ static int __init check_nmi_watchdog(voi printk(KERN_INFO "Testing NMI watchdog ... "); +#ifdef CONFIG_SMP if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); +#endif for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; + prev_nmi_count[cpu] = nmi_count(cpu); local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks @@ -179,7 +181,6 @@ static int lapic_nmi_resume(struct sys_d return 0; } - static struct sysdev_class nmi_sysclass = { set_kset_name("lapic_nmi"), .resume = lapic_nmi_resume, @@ -242,10 +243,10 @@ void acpi_nmi_disable(void) on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); } -void setup_apic_nmi_watchdog (void *unused) +void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) - return; + return; /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ @@ -334,7 +335,7 @@ __kprobes int nmi_watchdog_tick(struct p unsigned int sum; int touched = 0; int cpu = smp_processor_id(); - int rc=0; + int rc = 0; /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) diff -puN arch/x86/kernel/nmi_64.c~git-x86 arch/x86/kernel/nmi_64.c --- a/arch/x86/kernel/nmi_64.c~git-x86 +++ a/arch/x86/kernel/nmi_64.c @@ -39,7 +39,7 @@ static cpumask_t backtrace_mask = CPU_MA * 0: the lapic NMI watchdog is disabled, but can be enabled */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -int panic_on_timeout; +static int panic_on_timeout; unsigned int nmi_watchdog = NMI_DEFAULT; static unsigned int nmi_hz = HZ; @@ -78,22 +78,22 @@ static __init void nmi_cpu_busy(void *da } #endif -int __init check_nmi_watchdog (void) +int __init check_nmi_watchdog(void) { - int *counts; + int *prev_nmi_count; int cpu; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) return 0; if (!atomic_read(&nmi_active)) return 0; - counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!counts) + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) return -1; - printk(KERN_INFO "testing NMI watchdog ... "); + printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP if (nmi_watchdog == NMI_LOCAL_APIC) @@ -101,30 +101,29 @@ int __init check_nmi_watchdog (void) #endif for (cpu = 0; cpu < NR_CPUS; cpu++) - counts[cpu] = cpu_pda(cpu)->__nmi_count; + prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { + if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", - cpu, - counts[cpu], - cpu_pda(cpu)->__nmi_count); + cpu, + prev_nmi_count[cpu], + cpu_pda(cpu)->__nmi_count); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } } + endflag = 1; if (!atomic_read(&nmi_active)) { - kfree(counts); + kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - endflag = 1; return -1; } - endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -132,11 +131,11 @@ int __init check_nmi_watchdog (void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = lapic_adjust_nmi_hz(1); - kfree(counts); + kfree(prev_nmi_count); return 0; } -int __init setup_nmi_watchdog(char *str) +static int __init setup_nmi_watchdog(char *str) { int nmi; @@ -159,34 +158,6 @@ int __init setup_nmi_watchdog(char *str) __setup("nmi_watchdog=", setup_nmi_watchdog); - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} #ifdef CONFIG_PM static int nmi_pm_active; /* nmi_active before suspend */ @@ -217,7 +188,7 @@ static struct sysdev_class nmi_sysclass }; static struct sys_device device_lapic_nmi = { - .id = 0, + .id = 0, .cls = &nmi_sysclass, }; @@ -231,7 +202,7 @@ static int __init init_lapic_nmi_sysfs(v if (nmi_watchdog != NMI_LOCAL_APIC) return 0; - if ( atomic_read(&nmi_active) < 0 ) + if (atomic_read(&nmi_active) < 0) return 0; error = sysdev_class_register(&nmi_sysclass); @@ -244,9 +215,37 @@ late_initcall(init_lapic_nmi_sysfs); #endif /* CONFIG_PM */ +static void __acpi_nmi_enable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI); +} + +/* + * Enable timer based NMIs on all CPUs: + */ +void acpi_nmi_enable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); +} + +static void __acpi_nmi_disable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); +} + +/* + * Disable timer based NMIs on all CPUs: + */ +void acpi_nmi_disable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); +} + void setup_apic_nmi_watchdog(void *unused) { - if (__get_cpu_var(wd_enabled) == 1) + if (__get_cpu_var(wd_enabled)) return; /* cheap hack to support suspend/resume */ @@ -311,8 +310,9 @@ void touch_nmi_watchdog(void) } } - touch_softlockup_watchdog(); + touch_softlockup_watchdog(); } +EXPORT_SYMBOL(touch_nmi_watchdog); int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { @@ -479,4 +479,3 @@ void __trigger_all_cpu_backtrace(void) EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); -EXPORT_SYMBOL(touch_nmi_watchdog); diff -puN arch/x86/kernel/paravirt_32.c~git-x86 arch/x86/kernel/paravirt_32.c --- a/arch/x86/kernel/paravirt_32.c~git-x86 +++ a/arch/x86/kernel/paravirt_32.c @@ -60,7 +60,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti" DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); +DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -88,7 +88,7 @@ static unsigned native_patch(u8 type, u1 SITE(pv_irq_ops, restore_fl); SITE(pv_irq_ops, save_fl); SITE(pv_cpu_ops, iret); - SITE(pv_cpu_ops, irq_enable_sysexit); + SITE(pv_cpu_ops, irq_enable_syscall_ret); SITE(pv_mmu_ops, read_cr2); SITE(pv_mmu_ops, read_cr3); SITE(pv_mmu_ops, write_cr3); @@ -186,7 +186,7 @@ unsigned paravirt_patch_default(u8 type, /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) + type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); else @@ -237,7 +237,7 @@ static void native_flush_tlb_single(unsi /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_sysexit(void); +extern void native_irq_enable_syscall_ret(void); static int __init print_banner(void) { @@ -382,9 +382,9 @@ struct pv_cpu_ops pv_cpu_ops = { .write_ldt_entry = write_dt_entry, .write_gdt_entry = write_dt_entry, .write_idt_entry = write_dt_entry, - .load_esp0 = native_load_esp0, + .load_sp0 = native_load_sp0, - .irq_enable_sysexit = native_irq_enable_sysexit, + .irq_enable_syscall_ret = native_irq_enable_syscall_ret, .iret = native_iret, .set_iopl_mask = native_set_iopl_mask, diff -puN arch/x86/kernel/pci-calgary_64.c~git-x86 arch/x86/kernel/pci-calgary_64.c --- a/arch/x86/kernel/pci-calgary_64.c~git-x86 +++ a/arch/x86/kernel/pci-calgary_64.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -183,7 +182,7 @@ static struct calgary_bus_info bus_info[ /* enable this to stress test the chip's TCE cache */ #ifdef CONFIG_IOMMU_DEBUG -int debugging __read_mostly = 1; +static int debugging = 1; static inline unsigned long verify_bit_range(unsigned long* bitmap, int expected, unsigned long start, unsigned long end) @@ -202,7 +201,7 @@ static inline unsigned long verify_bit_r return ~0UL; } #else /* debugging is disabled */ -int debugging __read_mostly = 0; +static int debugging; static inline unsigned long verify_bit_range(unsigned long* bitmap, int expected, unsigned long start, unsigned long end) diff -puN arch/x86/kernel/pci-dma_64.c~git-x86 arch/x86/kernel/pci-dma_64.c --- a/arch/x86/kernel/pci-dma_64.c~git-x86 +++ a/arch/x86/kernel/pci-dma_64.c @@ -13,7 +13,6 @@ #include int iommu_merge __read_mostly = 0; -EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address __read_mostly; EXPORT_SYMBOL(bad_dma_address); @@ -230,7 +229,7 @@ EXPORT_SYMBOL(dma_set_mask); * See for the iommu kernel parameter * documentation. */ -__init int iommu_setup(char *p) +static __init int iommu_setup(char *p) { iommu_merge = 1; diff -puN arch/x86/kernel/pci-gart_64.c~git-x86 arch/x86/kernel/pci-gart_64.c --- a/arch/x86/kernel/pci-gart_64.c~git-x86 +++ a/arch/x86/kernel/pci-gart_64.c @@ -1,12 +1,12 @@ /* * Dynamic DMA mapping support for AMD Hammer. - * + * * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. * This allows to use PCI devices that only support 32bit addresses on systems - * with more than 4GB. + * with more than 4GB. * * See Documentation/DMA-mapping.txt for the interface specification. - * + * * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU General Public License v2 only. */ @@ -37,23 +37,26 @@ #include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ -static unsigned long iommu_size; /* size of remapping area bytes */ +static unsigned long iommu_size; /* size of remapping area bytes */ static unsigned long iommu_pages; /* .. and in pages */ -static u32 *iommu_gatt_base; /* Remapping table */ +static u32 *iommu_gatt_base; /* Remapping table */ -/* If this is disabled the IOMMU will use an optimized flushing strategy - of only flushing when an mapping is reused. With it true the GART is flushed - for every mapping. Problem is that doing the lazy flush seems to trigger - bugs with some popular PCI cards, in particular 3ware (but has been also - also seen with Qlogic at least). */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ int iommu_fullflush = 1; -/* Allocation bitmap for the remapping area */ +/* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); -static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ +/* Guarded by iommu_bitmap_lock: */ +static unsigned long *iommu_gart_bitmap; -static u32 gart_unmapped_entry; +static u32 gart_unmapped_entry; #define GPTE_VALID 1 #define GPTE_COHERENT 2 @@ -61,10 +64,10 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr,size) \ +#define to_pages(addr, size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) -#define EMERGENCY_PAGES 32 /* = 128KB */ +#define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP #define AGPEXTERN extern @@ -77,130 +80,152 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(int size) -{ +static unsigned long alloc_iommu(int size) +{ unsigned long offset, flags; - spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); + spin_lock_irqsave(&iommu_bitmap_lock, flags); + offset = find_next_zero_string(iommu_gart_bitmap, next_bit, + iommu_pages, size); if (offset == -1) { need_flush = 1; - offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); + offset = find_next_zero_string(iommu_gart_bitmap, 0, + iommu_pages, size); } - if (offset != -1) { - set_bit_string(iommu_gart_bitmap, offset, size); - next_bit = offset+size; - if (next_bit >= iommu_pages) { + if (offset != -1) { + set_bit_string(iommu_gart_bitmap, offset, size); + next_bit = offset+size; + if (next_bit >= iommu_pages) { next_bit = 0; need_flush = 1; - } - } + } + } if (iommu_fullflush) need_flush = 1; - spin_unlock_irqrestore(&iommu_bitmap_lock, flags); + spin_unlock_irqrestore(&iommu_bitmap_lock, flags); + return offset; -} +} static void free_iommu(unsigned long offset, int size) -{ +{ unsigned long flags; + spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} +} -/* +/* * Use global flush state to avoid races with multiple flushers. */ static void flush_gart(void) -{ +{ unsigned long flags; + spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); need_flush = 0; - } + } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} +} #ifdef CONFIG_IOMMU_LEAK -#define SET_LEAK(x) if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0); -#define CLEAR_LEAK(x) if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; +#define SET_LEAK(x) \ + do { \ + if (iommu_leak_tab) \ + iommu_leak_tab[x] = __builtin_return_address(0);\ + } while (0) + +#define CLEAR_LEAK(x) \ + do { \ + if (iommu_leak_tab) \ + iommu_leak_tab[x] = NULL; \ + } while (0) /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; +static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; + static void dump_leak(void) { int i; - static int dump; - if (dump || !iommu_leak_tab) return; + static int dump; + + if (dump || !iommu_leak_tab) + return; dump = 1; - show_stack(NULL,NULL); - /* Very crude. dump some from the end of the table too */ - printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i+=2) { - printk("%lu: ", iommu_pages-i); + show_stack(NULL, NULL); + + /* Very crude. dump some from the end of the table too */ + printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", + iommu_leak_pages); + for (i = 0; i < iommu_leak_pages; i += 2) { + printk(KERN_DEBUG "%lu: ", iommu_pages-i); printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); - printk("%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk("\n"); + printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); + } + printk(KERN_DEBUG "\n"); } #else -#define SET_LEAK(x) -#define CLEAR_LEAK(x) +# define SET_LEAK(x) +# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) { - /* + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. - * Return some non mapped prereserved space in the aperture and + * Return some non mapped prereserved space in the aperture and * let the Northbridge deal with it. This will result in garbage * in the IO operation. When the size exceeds the prereserved space - * memory corruption will occur or random memory will be DMAed + * memory corruption will occur or random memory will be DMAed * out. Hopefully no network devices use single mappings that big. - */ - - printk(KERN_ERR - "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", - size, dev->bus_id); + */ + + printk(KERN_ERR + "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", + size, dev->bus_id); if (size > PAGE_SIZE*EMERGENCY_PAGES) { if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); - } - + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic(KERN_ERR + "PCI-DMA: Random memory would be DMAed\n"); + } #ifdef CONFIG_IOMMU_LEAK - dump_leak(); + dump_leak(); #endif -} +} -static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) -{ +static inline int +need_iommu(struct device *dev, unsigned long addr, size_t size) +{ u64 mask = *dev->dma_mask; int high = addr + size > mask; int mmu = high; - if (force_iommu) - mmu = 1; - return mmu; + + if (force_iommu) + mmu = 1; + + return mmu; } -static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) -{ +static inline int +nonforced_iommu(struct device *dev, unsigned long addr, size_t size) +{ u64 mask = *dev->dma_mask; int high = addr + size > mask; int mmu = high; - return mmu; + + return mmu; } /* Map a single continuous physical area into the IOMMU. @@ -208,13 +233,14 @@ static inline int nonforced_iommu(struct */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) -{ +{ unsigned long npages = to_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(npages); int i; + if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) - return phys_mem; + return phys_mem; if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); @@ -229,35 +255,39 @@ static dma_addr_t dma_map_area(struct de return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t gart_map_simple(struct device *dev, char *buf, - size_t size, int dir) +static dma_addr_t +gart_map_simple(struct device *dev, char *buf, size_t size, int dir) { dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + flush_gart(); + return map; } /* Map a single area into the IOMMU */ -static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) +static dma_addr_t +gart_map_single(struct device *dev, void *addr, size_t size, int dir) { unsigned long phys_mem, bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); + phys_mem = virt_to_phys(addr); if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + return phys_mem; bus = gart_map_simple(dev, addr, size, dir); - return bus; + + return bus; } /* * Free a DMA mapping. */ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) + size_t size, int direction) { unsigned long iommu_page; int npages; @@ -266,6 +296,7 @@ static void gart_unmap_single(struct dev if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || dma_addr >= iommu_bus_base + iommu_size) return; + iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; npages = to_pages(dma_addr, size); for (i = 0; i < npages; i++) { @@ -278,7 +309,8 @@ static void gart_unmap_single(struct dev /* * Wrapper for pci_unmap_single working with scatterlists. */ -static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static void +gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { struct scatterlist *s; int i; @@ -303,12 +335,13 @@ static int dma_map_sg_nonforce(struct de for_each_sg(sg, s, nents, i) { unsigned long addr = sg_phys(s); - if (nonforced_iommu(dev, addr, s->length)) { + + if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); - if (addr == bad_dma_address) { - if (i > 0) + if (addr == bad_dma_address) { + if (i > 0) gart_unmap_sg(dev, sg, i, dir); - nents = 0; + nents = 0; sg[0].dma_length = 0; break; } @@ -317,15 +350,16 @@ static int dma_map_sg_nonforce(struct de s->dma_length = s->length; } flush_gart(); + return nents; } /* Map multiple scatterlist entries continuous into the first. */ static int __dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, unsigned long pages) + struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); - unsigned long iommu_page = iommu_start; + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; @@ -335,32 +369,33 @@ static int __dma_map_cont(struct scatter for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; - + BUG_ON(s != start && s->offset); if (s == start) { sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; - } else { - sout->dma_length += s->length; + } else { + sout->dma_length += s->length; } addr = phys_addr; - pages = to_pages(s->offset, s->length); - while (pages--) { - iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); + pages = to_pages(s->offset, s->length); + while (pages--) { + iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } - } - BUG_ON(iommu_page - iommu_start != pages); + } + BUG_ON(iommu_page - iommu_start != pages); + return 0; } -static inline int dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, - unsigned long pages, int need) +static inline int +dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, + unsigned long pages, int need) { if (!need) { BUG_ON(nelems != 1); @@ -370,22 +405,19 @@ static inline int dma_map_cont(struct sc } return __dma_map_cont(start, nelems, sout, pages); } - + /* * DMA map all entries in a scatterlist. - * Merge chunks that have page aligned sizes into a continuous mapping. + * Merge chunks that have page aligned sizes into a continuous mapping. */ -static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, - int dir) +static int +gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { - int i; - int out; - int start; - unsigned long pages = 0; - int need = 0, nextneed; struct scatterlist *s, *ps, *start_sg, *sgmap; + int need = 0, nextneed, i, out, start; + unsigned long pages = 0; - if (nents == 0) + if (nents == 0) return 0; if (!dev) @@ -397,15 +429,19 @@ static int gart_map_sg(struct device *de ps = NULL; /* shut up gcc */ for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); + s->dma_address = addr; - BUG_ON(s->length == 0); + BUG_ON(s->length == 0); - nextneed = need_iommu(dev, addr, s->length); + nextneed = need_iommu(dev, addr, s->length); /* Handle the previous not yet processed entries */ if (i > start) { - /* Can only merge when the last chunk ends on a page - boundary and the new one doesn't have an offset. */ + /* + * Can only merge when the last chunk ends on a + * page boundary and the new one doesn't have an + * offset. + */ if (!iommu_merge || !nextneed || !need || s->offset || (ps->offset + ps->length) % PAGE_SIZE) { if (dma_map_cont(start_sg, i - start, sgmap, @@ -436,6 +472,7 @@ static int gart_map_sg(struct device *de error: flush_gart(); gart_unmap_sg(dev, sg, out, dir); + /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { out = dma_map_sg_nonforce(dev, sg, nents, dir); @@ -444,64 +481,68 @@ error: } if (panic_on_overflow) panic("dma_map_sg: overflow on %lu pages\n", pages); + iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) s->dma_address = bad_dma_address; return 0; -} +} static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) -{ - unsigned long a; - if (!iommu_size) { - iommu_size = aper_size; - if (!no_agp) - iommu_size /= 2; - } +{ + unsigned long a; - a = aper + iommu_size; + if (!iommu_size) { + iommu_size = aper_size; + if (!no_agp) + iommu_size /= 2; + } + + a = aper + iommu_size; iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; - if (iommu_size < 64*1024*1024) + if (iommu_size < 64*1024*1024) { printk(KERN_WARNING - "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); - + "PCI-DMA: Warning: Small IOMMU %luMB." + " Consider increasing the AGP aperture in BIOS\n", + iommu_size >> 20); + } + return iommu_size; -} +} -static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) -{ - unsigned aper_size = 0, aper_base_32; +static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) +{ + unsigned aper_size = 0, aper_base_32, aper_order; u64 aper_base; - unsigned aper_order; - pci_read_config_dword(dev, 0x94, &aper_base_32); + pci_read_config_dword(dev, 0x94, &aper_base_32); pci_read_config_dword(dev, 0x90, &aper_order); - aper_order = (aper_order >> 1) & 7; + aper_order = (aper_order >> 1) & 7; - aper_base = aper_base_32 & 0x7fff; + aper_base = aper_base_32 & 0x7fff; aper_base <<= 25; - aper_size = (32 * 1024 * 1024) << aper_order; - if (aper_base + aper_size > 0x100000000UL || !aper_size) + aper_size = (32 * 1024 * 1024) << aper_order; + if (aper_base + aper_size > 0x100000000UL || !aper_size) aper_base = 0; *size = aper_size; return aper_base; -} +} -/* +/* * Private Northbridge GATT initialization in case we cannot use the - * AGP driver for some reason. + * AGP driver for some reason. */ static __init int init_k8_gatt(struct agp_kern_info *info) -{ +{ + unsigned aper_size, gatt_size, new_aper_size; + unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - unsigned aper_base, new_aper_base; - unsigned aper_size, gatt_size, new_aper_size; int i; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); @@ -509,75 +550,77 @@ static __init int init_k8_gatt(struct ag dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { dev = k8_northbridges[i]; - new_aper_base = read_aperture(dev, &new_aper_size); - if (!new_aper_base) - goto nommu; - - if (!aper_base) { + new_aper_base = read_aperture(dev, &new_aper_size); + if (!new_aper_base) + goto nommu; + + if (!aper_base) { aper_size = new_aper_size; aper_base = new_aper_base; - } - if (aper_size != new_aper_size || aper_base != new_aper_base) + } + if (aper_size != new_aper_size || aper_base != new_aper_base) goto nommu; } if (!aper_base) - goto nommu; + goto nommu; info->aper_base = aper_base; - info->aper_size = aper_size>>20; + info->aper_size = aper_size >> 20; - gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); - if (!gatt) + gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); + gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + if (!gatt) panic("Cannot allocate GATT table"); - if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE)) + if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, + PAGE_KERNEL_NOCACHE)) panic("Could not set GART PTEs to uncacheable pages"); global_flush_tlb(); - memset(gatt, 0, gatt_size); + memset(gatt, 0, gatt_size); agp_gatt_table = gatt; for (i = 0; i < num_k8_northbridges; i++) { - u32 ctl; - u32 gatt_reg; + u32 gatt_reg; + u32 ctl; dev = k8_northbridges[i]; - gatt_reg = __pa(gatt) >> 12; - gatt_reg <<= 4; + gatt_reg = __pa(gatt) >> 12; + gatt_reg <<= 4; pci_write_config_dword(dev, 0x98, gatt_reg); - pci_read_config_dword(dev, 0x90, &ctl); + pci_read_config_dword(dev, 0x90, &ctl); ctl |= 1; ctl &= ~((1<<4) | (1<<5)); - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, 0x90, ctl); } flush_gart(); - - printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); + + printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + aper_base, aper_size>>10); return 0; nommu: - /* Should not happen anymore */ + /* Should not happen anymore */ printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); - return -1; -} + return -1; +} extern int agp_amd64_init(void); static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, - .map_single = gart_map_single, - .map_simple = gart_map_simple, - .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, - .map_sg = gart_map_sg, - .unmap_sg = gart_unmap_sg, + .mapping_error = NULL, + .map_single = gart_map_single, + .map_simple = gart_map_simple, + .unmap_single = gart_unmap_single, + .sync_single_for_cpu = NULL, + .sync_single_for_device = NULL, + .sync_single_range_for_cpu = NULL, + .sync_single_range_for_device = NULL, + .sync_sg_for_cpu = NULL, + .sync_sg_for_device = NULL, + .map_sg = gart_map_sg, + .unmap_sg = gart_unmap_sg, }; void gart_iommu_shutdown(void) @@ -588,23 +631,23 @@ void gart_iommu_shutdown(void) if (no_agp && (dma_ops != &gart_dma_ops)) return; - for (i = 0; i < num_k8_northbridges; i++) { - u32 ctl; + for (i = 0; i < num_k8_northbridges; i++) { + u32 ctl; - dev = k8_northbridges[i]; - pci_read_config_dword(dev, 0x90, &ctl); + dev = k8_northbridges[i]; + pci_read_config_dword(dev, 0x90, &ctl); - ctl &= ~1; + ctl &= ~1; - pci_write_config_dword(dev, 0x90, ctl); - } + pci_write_config_dword(dev, 0x90, ctl); + } } void __init gart_iommu_init(void) -{ +{ struct agp_kern_info info; - unsigned long aper_size; unsigned long iommu_start; + unsigned long aper_size; unsigned long scratch; long i; @@ -614,14 +657,14 @@ void __init gart_iommu_init(void) } #ifndef CONFIG_AGP_AMD64 - no_agp = 1; + no_agp = 1; #else /* Makefile puts PCI initialization via subsys_initcall first. */ /* Add other K8 AGP bridge drivers here */ - no_agp = no_agp || - (agp_amd64_init() < 0) || + no_agp = no_agp || + (agp_amd64_init() < 0) || (agp_copy_info(agp_bridge, &info) < 0); -#endif +#endif if (swiotlb) return; @@ -643,77 +686,78 @@ void __init gart_iommu_init(void) } printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); - aper_size = info.aper_size * 1024 * 1024; - iommu_size = check_iommu_size(info.aper_base, aper_size); - iommu_pages = iommu_size >> PAGE_SHIFT; - - iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, - get_order(iommu_pages/8)); - if (!iommu_gart_bitmap) - panic("Cannot allocate iommu bitmap\n"); + aper_size = info.aper_size * 1024 * 1024; + iommu_size = check_iommu_size(info.aper_base, aper_size); + iommu_pages = iommu_size >> PAGE_SHIFT; + + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + get_order(iommu_pages/8)); + if (!iommu_gart_bitmap) + panic("Cannot allocate iommu bitmap\n"); memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK - if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + if (leak_trace) { + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); + if (iommu_leak_tab) + memset(iommu_leak_tab, 0, iommu_pages * 8); else - printk("PCI-DMA: Cannot allocate leak trace area\n"); - } + printk(KERN_DEBUG + "PCI-DMA: Cannot allocate leak trace area\n"); + } #endif - /* + /* * Out of IOMMU space handling. - * Reserve some invalid pages at the beginning of the GART. - */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + * Reserve some invalid pages at the beginning of the GART. + */ + set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; + agp_memory_reserved = iommu_size; printk(KERN_INFO "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", - iommu_size>>20); + iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; bad_dma_address = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); - /* + /* * Unmap the IOMMU part of the GART. The alias of the page is * always mapped with cache enabled and there is no full cache * coherency across the GART remapping. The unmapping avoids * automatic prefetches from the CPU allocating cache lines in * there. All CPU accesses are done via the direct mapping to * the backing memory. The GART address is only used by PCI - * devices. + * devices. */ clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); - /* - * Try to workaround a bug (thanks to BenH) - * Set unmapped entries to a scratch page instead of 0. + /* + * Try to workaround a bug (thanks to BenH) + * Set unmapped entries to a scratch page instead of 0. * Any prefetches that hit unmapped entries won't get an bus abort * then. */ - scratch = get_zeroed_page(GFP_KERNEL); - if (!scratch) + scratch = get_zeroed_page(GFP_KERNEL); + if (!scratch) panic("Cannot allocate iommu scratch page"); gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); - for (i = EMERGENCY_PAGES; i < iommu_pages; i++) + for (i = EMERGENCY_PAGES; i < iommu_pages; i++) iommu_gatt_base[i] = gart_unmapped_entry; flush_gart(); dma_ops = &gart_dma_ops; -} +} void __init gart_parse_options(char *p) { int arg; #ifdef CONFIG_IOMMU_LEAK - if (!strncmp(p,"leak",4)) { + if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; if (*p == '=') ++p; @@ -723,18 +767,18 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush",8)) + if (!strncmp(p, "fullflush", 8)) iommu_fullflush = 1; - if (!strncmp(p, "nofullflush",11)) + if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; - if (!strncmp(p,"noagp",5)) + if (!strncmp(p, "noagp", 5)) no_agp = 1; - if (!strncmp(p, "noaperture",10)) + if (!strncmp(p, "noaperture", 10)) fix_aperture = 0; /* duplicated from pci-dma.c */ - if (!strncmp(p,"force",5)) + if (!strncmp(p, "force", 5)) gart_iommu_aperture_allowed = 1; - if (!strncmp(p,"allowed",7)) + if (!strncmp(p, "allowed", 7)) gart_iommu_aperture_allowed = 1; if (!strncmp(p, "memaper", 7)) { fallback_aper_force = 1; diff -puN arch/x86/kernel/pci-swiotlb_64.c~git-x86 arch/x86/kernel/pci-swiotlb_64.c --- a/arch/x86/kernel/pci-swiotlb_64.c~git-x86 +++ a/arch/x86/kernel/pci-swiotlb_64.c @@ -10,7 +10,6 @@ #include int swiotlb __read_mostly; -EXPORT_SYMBOL(swiotlb); const struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, diff -puN arch/x86/kernel/pmtimer_64.c~git-x86 arch/x86/kernel/pmtimer_64.c --- a/arch/x86/kernel/pmtimer_64.c~git-x86 +++ a/arch/x86/kernel/pmtimer_64.c @@ -19,13 +19,13 @@ #include #include #include +#include + #include #include #include #include -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - static inline u32 cyc2us(u32 cycles) { /* The Power Management Timer ticks at 3.579545 ticks per microsecond. diff -puN arch/x86/kernel/process_32.c~git-x86 arch/x86/kernel/process_32.c --- a/arch/x86/kernel/process_32.c~git-x86 +++ a/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -74,7 +75,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); */ unsigned long thread_saved_pc(struct task_struct *tsk) { - return ((unsigned long *)tsk->thread.esp)[3]; + return ((unsigned long *)tsk->thread.sp)[3]; } /* @@ -113,10 +114,19 @@ void default_idle(void) smp_mb(); local_irq_disable(); - if (!need_resched()) + if (!need_resched()) { + ktime_t t0, t1; + u64 t0n, t1n; + + t0 = ktime_get(); + t0n = ktime_to_ns(t0); safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); + local_irq_disable(); + t1 = ktime_get(); + t1n = ktime_to_ns(t1); + sched_clock_idle_wakeup_event(t1n - t0n); + } + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { /* loop is done by the caller */ @@ -132,7 +142,7 @@ EXPORT_SYMBOL(default_idle); * to poll the ->work.need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +static void poll_idle(void) { cpu_relax(); } @@ -244,13 +254,13 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) - __mwait(eax, ecx); + __mwait(ax, cx); } } @@ -299,15 +309,15 @@ void __show_registers(struct pt_regs *re { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; - unsigned long esp; + unsigned long sp; unsigned short ss, gs; if (user_mode_vm(regs)) { - esp = regs->esp; - ss = regs->xss & 0xffff; + sp = regs->sp; + ss = regs->ss & 0xffff; savesegment(gs, gs); } else { - esp = (unsigned long) (®s->esp); + sp = (unsigned long) (®s->sp); savesegment(ss, ss); savesegment(gs, gs); } @@ -320,17 +330,17 @@ void __show_registers(struct pt_regs *re init_utsname()->version); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", - 0xffff & regs->xcs, regs->eip, regs->eflags, + 0xffff & regs->cs, regs->ip, regs->flags, smp_processor_id()); - print_symbol("EIP is at %s\n", regs->eip); + print_symbol("EIP is at %s\n", regs->ip); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); + regs->ax, regs->bx, regs->cx, regs->dx); printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); + regs->si, regs->di, regs->bp, sp); printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, - regs->xfs & 0xffff, gs, ss); + regs->ds & 0xffff, regs->es & 0xffff, + regs->fs & 0xffff, gs, ss); if (!all) return; @@ -358,12 +368,12 @@ void __show_registers(struct pt_regs *re void show_regs(struct pt_regs *regs) { __show_registers(regs, 1); - show_trace(NULL, regs, ®s->esp); + show_trace(NULL, regs, ®s->sp); } /* - * This gets run with %ebx containing the - * function to call, and %edx containing + * This gets run with %bx containing the + * function to call, and %dx containing * the "args". */ extern void kernel_thread_helper(void); @@ -377,16 +387,16 @@ int kernel_thread(int (*fn)(void *), voi memset(®s, 0, sizeof(regs)); - regs.ebx = (unsigned long) fn; - regs.edx = (unsigned long) arg; + regs.bx = (unsigned long) fn; + regs.dx = (unsigned long) arg; - regs.xds = __USER_DS; - regs.xes = __USER_DS; - regs.xfs = __KERNEL_PERCPU; - regs.orig_eax = -1; - regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS | get_kernel_rpl(); - regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; + regs.ds = __USER_DS; + regs.es = __USER_DS; + regs.fs = __KERNEL_PERCPU; + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; + regs.cs = __KERNEL_CS | get_kernel_rpl(); + regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); @@ -424,7 +434,12 @@ void flush_thread(void) { struct task_struct *tsk = current; - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; + tsk->thread.debugreg2 = 0; + tsk->thread.debugreg3 = 0; + tsk->thread.debugreg6 = 0; + tsk->thread.debugreg7 = 0; memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); clear_tsk_thread_flag(tsk, TIF_DEBUG); /* @@ -449,7 +464,7 @@ void prepare_to_copy(struct task_struct unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { @@ -459,15 +474,15 @@ int copy_thread(int nr, unsigned long cl childregs = task_pt_regs(p); *childregs = *regs; - childregs->eax = 0; - childregs->esp = esp; + childregs->ax = 0; + childregs->sp = sp; - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); + p->thread.sp = (unsigned long) childregs; + p->thread.sp0 = (unsigned long) (childregs+1); - p->thread.eip = (unsigned long) ret_from_fork; + p->thread.ip = (unsigned long) ret_from_fork; - savesegment(gs,p->thread.gs); + savesegment(gs, p->thread.gs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -480,32 +495,15 @@ int copy_thread(int nr, unsigned long cl set_tsk_thread_flag(p, TIF_IO_BITMAP); } + err = 0; + /* * Set a new TLS for the child thread? */ - if (clone_flags & CLONE_SETTLS) { - struct desc_struct *desc; - struct user_desc info; - int idx; - - err = -EFAULT; - if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) - goto out; - err = -EINVAL; - if (LDT_empty(&info)) - goto out; - - idx = info.entry_number; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - goto out; - - desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } + if (clone_flags & CLONE_SETTLS) + err = do_set_thread_area(p, -1, + (struct user_desc __user *)childregs->si, 0); - err = 0; - out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; @@ -518,54 +516,60 @@ int copy_thread(int nr, unsigned long cl */ void dump_thread(struct pt_regs * regs, struct user * dump) { - int i; + u16 gs; /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); + dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->thread.debugreg[i]; + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - dump->regs.ebx = regs->ebx; - dump->regs.ecx = regs->ecx; - dump->regs.edx = regs->edx; - dump->regs.esi = regs->esi; - dump->regs.edi = regs->edi; - dump->regs.ebp = regs->ebp; - dump->regs.eax = regs->eax; - dump->regs.ds = regs->xds; - dump->regs.es = regs->xes; - dump->regs.fs = regs->xfs; - savesegment(gs,dump->regs.gs); - dump->regs.orig_eax = regs->orig_eax; - dump->regs.eip = regs->eip; - dump->regs.cs = regs->xcs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->esp; - dump->regs.ss = regs->xss; + dump->regs.bx = regs->bx; + dump->regs.cx = regs->cx; + dump->regs.dx = regs->dx; + dump->regs.si = regs->si; + dump->regs.di = regs->di; + dump->regs.bp = regs->bp; + dump->regs.ax = regs->ax; + dump->regs.ds = (u16)regs->ds; + dump->regs.es = (u16)regs->es; + dump->regs.fs = (u16)regs->fs; + savesegment(gs,gs); + dump->regs.orig_ax = regs->orig_ax; + dump->regs.ip = regs->ip; + dump->regs.cs = (u16)regs->cs; + dump->regs.flags = regs->flags; + dump->regs.sp = regs->sp; + dump->regs.ss = (u16)regs->ss; dump->u_fpvalid = dump_fpu (regs, &dump->i387); } EXPORT_SYMBOL(dump_thread); -/* +/* * Capture the user space registers if the task is not running (in user space) */ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) { struct pt_regs ptregs = *task_pt_regs(tsk); - ptregs.xcs &= 0xffff; - ptregs.xds &= 0xffff; - ptregs.xes &= 0xffff; - ptregs.xss &= 0xffff; + ptregs.cs &= 0xffff; + ptregs.ds &= 0xffff; + ptregs.es &= 0xffff; + ptregs.ss &= 0xffff; elf_core_copy_regs(regs, &ptregs); @@ -598,18 +602,32 @@ static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { - struct thread_struct *next; + struct thread_struct *prev, *next; + unsigned long debugctl; + prev = &prev_p->thread; next = &next_p->thread; + debugctl = prev->debugctlmsr; + if (next->ds_area_msr != prev->ds_area_msr) { + /* we clear debugctl to make sure DS + * is not in use when we change it */ + debugctl = 0; + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); + } + + if (next->debugctlmsr != debugctl) + wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); + set_debugreg(next->debugreg0, 0); + set_debugreg(next->debugreg1, 1); + set_debugreg(next->debugreg2, 2); + set_debugreg(next->debugreg3, 3); /* no 4 and 5 */ - set_debugreg(next->debugreg[6], 6); - set_debugreg(next->debugreg[7], 7); + set_debugreg(next->debugreg6, 6); + set_debugreg(next->debugreg7, 7); } #ifdef CONFIG_SECCOMP @@ -623,6 +641,13 @@ __switch_to_xtra(struct task_struct *pre } #endif + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); + + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); + + if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache @@ -676,7 +701,7 @@ __switch_to_xtra(struct task_struct *pre * More important, however, is the fact that this allows us much * more flexibility. * - * The return value (in %eax) will be the "prev" task after + * The return value (in %ax) will be the "prev" task after * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ @@ -699,7 +724,7 @@ struct task_struct fastcall * __switch_t /* * Reload esp0. */ - load_esp0(tss, next); + load_sp0(tss, next); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -763,7 +788,7 @@ struct task_struct fastcall * __switch_t asmlinkage int sys_fork(struct pt_regs regs) { - return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); + return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); } asmlinkage int sys_clone(struct pt_regs regs) @@ -772,12 +797,12 @@ asmlinkage int sys_clone(struct pt_regs unsigned long newsp; int __user *parent_tidptr, *child_tidptr; - clone_flags = regs.ebx; - newsp = regs.ecx; - parent_tidptr = (int __user *)regs.edx; - child_tidptr = (int __user *)regs.edi; + clone_flags = regs.bx; + newsp = regs.cx; + parent_tidptr = (int __user *)regs.dx; + child_tidptr = (int __user *)regs.di; if (!newsp) - newsp = regs.esp; + newsp = regs.sp; return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); } @@ -793,7 +818,7 @@ asmlinkage int sys_clone(struct pt_regs */ asmlinkage int sys_vfork(struct pt_regs regs) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); } /* @@ -804,18 +829,15 @@ asmlinkage int sys_execve(struct pt_regs int error; char * filename; - filename = getname((char __user *) regs.ebx); + filename = getname((char __user *) regs.bx); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *) regs.ecx, - (char __user * __user *) regs.edx, + (char __user * __user *) regs.cx, + (char __user * __user *) regs.dx, ®s); if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); } @@ -829,145 +851,37 @@ out: unsigned long get_wchan(struct task_struct *p) { - unsigned long ebp, esp, eip; + unsigned long bp, sp, ip; unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; stack_page = (unsigned long)task_stack_page(p); - esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > top_esp+stack_page) + sp = p->thread.sp; + if (!stack_page || sp < stack_page || sp > top_esp+stack_page) return 0; - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ - ebp = *(unsigned long *) esp; + /* include/asm-i386/system.h:switch_to() pushes bp last. */ + bp = *(unsigned long *) sp; do { - if (ebp < stack_page || ebp > top_ebp+stack_page) + if (bp < stack_page || bp > top_ebp+stack_page) return 0; - eip = *(unsigned long *) (ebp+4); - if (!in_sched_functions(eip)) - return eip; - ebp = *(unsigned long *) ebp; + ip = *(unsigned long *) (bp+4); + if (!in_sched_functions(ip)) + return ip; + bp = *(unsigned long *) bp; } while (count++ < 16); return 0; } -/* - * sys_alloc_thread_area: get a yet unused TLS descriptor index. - */ -static int get_free_idx(void) -{ - struct thread_struct *t = ¤t->thread; - int idx; - - for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) - if (desc_empty(t->tls_array + idx)) - return idx + GDT_ENTRY_TLS_MIN; - return -ESRCH; -} - -/* - * Set a given TLS descriptor: - */ -asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) -{ - struct thread_struct *t = ¤t->thread; - struct user_desc info; - struct desc_struct *desc; - int cpu, idx; - - if (copy_from_user(&info, u_info, sizeof(info))) - return -EFAULT; - idx = info.entry_number; - - /* - * index -1 means the kernel should try to find and - * allocate an empty descriptor: - */ - if (idx == -1) { - idx = get_free_idx(); - if (idx < 0) - return idx; - if (put_user(idx, &u_info->entry_number)) - return -EFAULT; - } - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; - - /* - * We must not get preempted while modifying the TLS. - */ - cpu = get_cpu(); - - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - load_TLS(t, cpu); - - put_cpu(); - - return 0; -} - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) - -asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) -{ - struct user_desc info; - struct desc_struct *desc; - int idx; - - if (get_user(idx, &u_info->entry_number)) - return -EFAULT; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - memset(&info, 0, sizeof(info)); - - desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - - if (copy_to_user(u_info, &info, sizeof(info))) - return -EFAULT; - return 0; -} - unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} diff -puN arch/x86/kernel/process_64.c~git-x86 arch/x86/kernel/process_64.c --- a/arch/x86/kernel/process_64.c~git-x86 +++ a/arch/x86/kernel/process_64.c @@ -3,7 +3,7 @@ * * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 - * + * * X86-64 port * Andi Kleen. * @@ -19,19 +19,19 @@ #include #include #include +#include #include #include -#include #include #include #include #include -#include #include #include +#include #include +#include #include -#include #include #include #include @@ -72,13 +72,6 @@ void idle_notifier_register(struct notif { atomic_notifier_chain_register(&idle_notifier, n); } -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL(idle_notifier_unregister); void enter_idle(void) { @@ -106,7 +99,7 @@ void exit_idle(void) * We use this if we don't have any better * idle routine.. */ -static void default_idle(void) +void default_idle(void) { current_thread_info()->status &= ~TS_POLLING; /* @@ -116,9 +109,16 @@ static void default_idle(void) smp_mb(); local_irq_disable(); if (!need_resched()) { - /* Enables interrupts one instruction before HLT. - x86 special cases this so there is no race. */ - safe_halt(); + ktime_t t0, t1; + u64 t0n, t1n; + + t0 = ktime_get(); + t0n = ktime_to_ns(t0); + safe_halt(); /* enables interrupts racelessly */ + local_irq_disable(); + t1 = ktime_get(); + t1n = ktime_to_ns(t1); + sched_clock_idle_wakeup_event(t1n - t0n); } else local_irq_enable(); current_thread_info()->status |= TS_POLLING; @@ -129,43 +129,12 @@ static void default_idle(void) * to poll the ->need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +static void poll_idle(void) { local_irq_enable(); cpu_relax(); } -void cpu_idle_wait(void) -{ - unsigned int cpu, this_cpu = get_cpu(); - cpumask_t map, tmp = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); - put_cpu(); - - cpus_clear(map); - for_each_online_cpu(cpu) { - per_cpu(cpu_idle_state, cpu) = 1; - cpu_set(cpu, map); - } - - __get_cpu_var(cpu_idle_state) = 0; - - wmb(); - do { - ssleep(1); - for_each_online_cpu(cpu) { - if (cpu_isset(cpu, map) && - !per_cpu(cpu_idle_state, cpu)) - cpu_clear(cpu, map); - } - cpus_and(map, map, cpu_online_map); - } while (!cpus_empty(map)); - - set_cpus_allowed(current, tmp); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -196,7 +165,7 @@ static inline void play_dead(void) * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ -void cpu_idle (void) +void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ @@ -236,6 +205,36 @@ void cpu_idle (void) } } +void cpu_idle_wait(void) +{ + unsigned int cpu, this_cpu = get_cpu(); + cpumask_t map, tmp = current->cpus_allowed; + + set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); + put_cpu(); + + cpus_clear(map); + for_each_online_cpu(cpu) { + per_cpu(cpu_idle_state, cpu) = 1; + cpu_set(cpu, map); + } + + __get_cpu_var(cpu_idle_state) = 0; + + wmb(); + do { + ssleep(1); + for_each_online_cpu(cpu) { + if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) + cpu_clear(cpu, map); + } + cpus_and(map, map, cpu_online_map); + } while (!cpus_empty(map)); + + set_cpus_allowed(current, tmp); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + /* * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, * which can obviate IPI to trigger checking of need_resched. @@ -246,13 +245,13 @@ void cpu_idle (void) * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) - __mwait(eax, ecx); + __mwait(ax, cx); } } @@ -289,7 +288,7 @@ void __cpuinit select_idle_routine(const } } -static int __init idle_setup (char *str) +static int __init idle_setup(char *str) { if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); @@ -304,13 +303,13 @@ static int __init idle_setup (char *str) } early_param("idle", idle_setup); -/* Prints also some state that isn't saved in the pt_regs */ +/* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; - unsigned int fsindex,gsindex; - unsigned int ds,cs,es; + unsigned int fsindex, gsindex; + unsigned int ds, cs, es; printk("\n"); print_modules(); @@ -319,16 +318,16 @@ void __show_regs(struct pt_regs * regs) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); - printk_address(regs->rip); - printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, - regs->eflags); + printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk_address(regs->ip); + printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, + regs->flags); printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", - regs->rax, regs->rbx, regs->rcx); + regs->ax, regs->bx, regs->cx); printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", - regs->rdx, regs->rsi, regs->rdi); + regs->dx, regs->si, regs->di); printk("RBP: %016lx R08: %016lx R09: %016lx\n", - regs->rbp, regs->r8, regs->r9); + regs->bp, regs->r8, regs->r9); printk("R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); printk("R13: %016lx R14: %016lx R15: %016lx\n", @@ -379,7 +378,7 @@ void exit_thread(void) struct task_struct *me = current; struct thread_struct *t = &me->thread; - if (me->thread.io_bitmap_ptr) { + if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); kfree(t->io_bitmap_ptr); @@ -415,7 +414,7 @@ void flush_thread(void) tsk->thread.debugreg3 = 0; tsk->thread.debugreg6 = 0; tsk->thread.debugreg7 = 0; - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); + memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. */ @@ -438,7 +437,7 @@ void release_thread(struct task_struct * static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) { - struct user_desc ud = { + struct user_desc ud = { .base_addr = addr, .limit = 0xfffff, .seg_32bit = 1, @@ -447,17 +446,13 @@ static inline void set_32bit_tls(struct }; struct n_desc_struct *desc = (void *)t->thread.tls_array; desc += tls; - desc->a = LDT_entry_a(&ud); - desc->b = LDT_entry_b(&ud); + desc->a = LDT_entry_a(&ud); + desc->b = LDT_entry_b(&ud); } static inline u32 read_32bit_tls(struct task_struct *t, int tls) { - struct desc_struct *desc = (void *)t->thread.tls_array; - desc += tls; - return desc->base0 | - (((u32)desc->base1) << 16) | - (((u32)desc->base2) << 24); + return get_desc_base(&t->thread.tls_array[tls]); } /* @@ -469,7 +464,7 @@ void prepare_to_copy(struct task_struct unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { @@ -481,14 +476,14 @@ int copy_thread(int nr, unsigned long cl (THREAD_SIZE + task_stack_page(p))) - 1; *childregs = *regs; - childregs->rax = 0; - childregs->rsp = rsp; - if (rsp == ~0UL) - childregs->rsp = (unsigned long)childregs; - - p->thread.rsp = (unsigned long) childregs; - p->thread.rsp0 = (unsigned long) (childregs+1); - p->thread.userrsp = me->thread.userrsp; + childregs->ax = 0; + childregs->sp = sp; + if (sp == ~0UL) + childregs->sp = (unsigned long)childregs; + + p->thread.sp = (unsigned long) childregs; + p->thread.sp0 = (unsigned long) (childregs+1); + p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); @@ -509,7 +504,7 @@ int copy_thread(int nr, unsigned long cl memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); set_tsk_thread_flag(p, TIF_IO_BITMAP); - } + } /* * Set a new TLS for the child thread? @@ -517,7 +512,8 @@ int copy_thread(int nr, unsigned long cl if (clone_flags & CLONE_SETTLS) { #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) - err = ia32_child_tls(p, childregs); + err = do_set_thread_area(p, -1, + (struct user_desc __user *)childregs->si, 0); else #endif err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); @@ -536,17 +532,48 @@ out: /* * This special macro can be used to load a debugging register */ -#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) +#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) + +/* + * Capture the user space registers if the task is not running (in user space) + */ +int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) +{ + struct pt_regs *pp, ptregs; + + pp = task_pt_regs(tsk); + + ptregs = *pp; + ptregs.cs &= 0xffff; + ptregs.ss &= 0xffff; + + elf_core_copy_regs(regs, &ptregs); + + return 1; +} static inline void __switch_to_xtra(struct task_struct *prev_p, - struct task_struct *next_p, - struct tss_struct *tss) + struct task_struct *next_p, + struct tss_struct *tss) { struct thread_struct *prev, *next; + unsigned long debugctl; prev = &prev_p->thread, next = &next_p->thread; + debugctl = prev->debugctlmsr; + if (next->ds_area_msr != prev->ds_area_msr) { + /* we clear debugctl to make sure DS + * is not in use when we change it */ + debugctl = 0; + wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); + wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + } + + if (next->debugctlmsr != debugctl) + wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { loaddebug(next, 0); loaddebug(next, 1); @@ -570,12 +597,22 @@ static inline void __switch_to_xtra(stru */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } + + /* + * Last branch recording recofiguration of trace hardware and + * disentangling of trace data per task. + */ + if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); + + if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) + ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); } /* * switch_to(x,y) should switch tasks from x to y. * - * This could still be optimized: + * This could still be optimized: * - fold all the options into a flag word and test it with a single test. * - could test fs/gs bitsliced * @@ -586,7 +623,7 @@ __switch_to(struct task_struct *prev_p, { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - int cpu = smp_processor_id(); + int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); /* we're going to use this soon, after a few expensive things */ @@ -596,7 +633,7 @@ __switch_to(struct task_struct *prev_p, /* * Reload esp0, LDT and the page table pointer: */ - tss->rsp0 = next->rsp0; + tss->sp0 = next->sp0; /* * Switch DS and ES. @@ -655,8 +692,8 @@ __switch_to(struct task_struct *prev_p, /* * Switch the PDA and FPU contexts. */ - prev->userrsp = read_pda(oldrsp); - write_pda(oldrsp, next->userrsp); + prev->usersp = read_pda(oldrsp); + write_pda(oldrsp, next->usersp); write_pda(pcurrent, next_p); write_pda(kernelstack, @@ -673,8 +710,8 @@ __switch_to(struct task_struct *prev_p, /* * Now maybe reload the debug registers and handle I/O bitmaps */ - if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) - || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || + task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); /* If the task has used fpu the last 5 timeslices, just do a full @@ -689,7 +726,7 @@ __switch_to(struct task_struct *prev_p, /* * sys_execve() executes a new program. */ -asmlinkage +asmlinkage long sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp, struct pt_regs regs) { @@ -701,11 +738,6 @@ long sys_execve(char __user *name, char if (IS_ERR(filename)) return error; error = do_execve(filename, argv, envp, ®s); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - } putname(filename); return error; } @@ -715,18 +747,18 @@ void set_personality_64bit(void) /* inherit personality from parent */ /* Make sure to be in 64bit mode */ - clear_thread_flag(TIF_IA32); + clear_thread_flag(TIF_IA32); /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, so it's not too bad. The main problem is just that - 32bit childs are affected again. */ + 32bit childs are affected again. */ current->personality &= ~READ_IMPLIES_EXEC; } asmlinkage long sys_fork(struct pt_regs *regs) { - return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } asmlinkage long @@ -734,7 +766,7 @@ sys_clone(unsigned long clone_flags, uns void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) { if (!newsp) - newsp = regs->rsp; + newsp = regs->sp; return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } @@ -750,29 +782,29 @@ sys_clone(unsigned long clone_flags, uns */ asmlinkage long sys_vfork(struct pt_regs *regs) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } unsigned long get_wchan(struct task_struct *p) { unsigned long stack; - u64 fp,rip; + u64 fp,ip; int count = 0; if (!p || p == current || p->state==TASK_RUNNING) return 0; stack = (unsigned long)task_stack_page(p); - if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE) + if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) return 0; - fp = *(u64 *)(p->thread.rsp); + fp = *(u64 *)(p->thread.sp); do { if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) return 0; - rip = *(u64 *)(fp+8); - if (!in_sched_functions(rip)) - return rip; + ip = *(u64 *)(fp+8); + if (!in_sched_functions(ip)) + return ip; fp = *(u64 *)fp; } while (count++ < 16); return 0; @@ -813,19 +845,19 @@ long do_arch_prctl(struct task_struct *t /* Not strictly needed for fs, but do it for symmetry with gs */ if (addr >= TASK_SIZE_OF(task)) - return -EPERM; + return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to + /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { + if (addr <= 0xffffffff) { set_32bit_tls(task, FS_TLS, addr); - if (doit) { - load_TLS(&task->thread, cpu); + if (doit) { + load_TLS(&task->thread, cpu); asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); } task->thread.fsindex = FS_TLS_SEL; task->thread.fs = 0; - } else { + } else { task->thread.fsindex = 0; task->thread.fs = addr; if (doit) { @@ -837,24 +869,24 @@ long do_arch_prctl(struct task_struct *t } put_cpu(); break; - case ARCH_GET_FS: { - unsigned long base; + case ARCH_GET_FS: { + unsigned long base; if (task->thread.fsindex == FS_TLS_SEL) base = read_32bit_tls(task, FS_TLS); else if (doit) rdmsrl(MSR_FS_BASE, base); else base = task->thread.fs; - ret = put_user(base, (unsigned long __user *)addr); - break; + ret = put_user(base, (unsigned long __user *)addr); + break; } - case ARCH_GET_GS: { + case ARCH_GET_GS: { unsigned long base; unsigned gsindex; if (task->thread.gsindex == GS_TLS_SEL) base = read_32bit_tls(task, GS_TLS); else if (doit) { - asm("movl %%gs,%0" : "=r" (gsindex)); + asm("movl %%gs,%0" : "=r" (gsindex)); if (gsindex) rdmsrl(MSR_KERNEL_GS_BASE, base); else @@ -862,39 +894,21 @@ long do_arch_prctl(struct task_struct *t } else base = task->thread.gs; - ret = put_user(base, (unsigned long __user *)addr); + ret = put_user(base, (unsigned long __user *)addr); break; } default: ret = -EINVAL; break; - } + } - return ret; -} + return ret; +} long sys_arch_prctl(int code, unsigned long addr) { return do_arch_prctl(current, code, addr); -} - -/* - * Capture the user space registers if the task is not running (in user space) - */ -int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) -{ - struct pt_regs *pp, ptregs; - - pp = task_pt_regs(tsk); - - ptregs = *pp; - ptregs.cs &= 0xffff; - ptregs.ss &= 0xffff; - - elf_core_copy_regs(regs, &ptregs); - - return 1; } unsigned long arch_align_stack(unsigned long sp) @@ -903,3 +917,10 @@ unsigned long arch_align_stack(unsigned sp -= get_random_int() % 8192; return sp & ~0xf; } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} + diff -puN /dev/null arch/x86/kernel/ptrace.c --- /dev/null +++ a/arch/x86/kernel/ptrace.c @@ -0,0 +1,1349 @@ +/* By Ross Biro 1/23/92 */ +/* + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + * + * BTS tracing + * Markus Metzger , Dec 2007 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * The maximal size of a BTS buffer per traced task in number of BTS + * records. + */ +#define PTRACE_BTS_BUFFER_MAX 4000 + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Determines which flags the user has access to [1 = access, 0 = no access]. + */ +#define FLAG_MASK_32 ((unsigned long) \ + (X86_EFLAGS_CF | X86_EFLAGS_PF | \ + X86_EFLAGS_AF | X86_EFLAGS_ZF | \ + X86_EFLAGS_SF | X86_EFLAGS_TF | \ + X86_EFLAGS_DF | X86_EFLAGS_OF | \ + X86_EFLAGS_RF | X86_EFLAGS_AC)) + +/* + * Determines whether a value may be installed in a segment register. + */ +static inline bool invalid_selector(u16 value) +{ + return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL); +} + +#ifdef CONFIG_X86_32 + +#define FLAG_MASK FLAG_MASK_32 + +static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +{ + BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); + regno >>= 2; + if (regno > FS) + --regno; + return ®s->bx + regno; +} + +static u16 get_segment_reg(struct task_struct *task, unsigned long offset) +{ + /* + * Returning the value truncates it to 16 bits. + */ + unsigned int retval; + if (offset != offsetof(struct user_regs_struct, gs)) + retval = *pt_regs_access(task_pt_regs(task), offset); + else { + retval = task->thread.gs; + if (task == current) + savesegment(gs, retval); + } + return retval; +} + +static int set_segment_reg(struct task_struct *task, + unsigned long offset, u16 value) +{ + /* + * The value argument was already truncated to 16 bits. + */ + if (invalid_selector(value)) + return -EIO; + + if (offset != offsetof(struct user_regs_struct, gs)) + *pt_regs_access(task_pt_regs(task), offset) = value; + else { + task->thread.gs = value; + if (task == current) + /* + * The user-mode %gs is not affected by + * kernel entry, so we must update the CPU. + */ + loadsegment(gs, value); + } + + return 0; +} + +static unsigned long debugreg_addr_limit(struct task_struct *task) +{ + return TASK_SIZE - 3; +} + +#else /* CONFIG_X86_64 */ + +#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) + +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset) +{ + BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0); + return ®s->r15 + (offset / sizeof(regs->r15)); +} + +static u16 get_segment_reg(struct task_struct *task, unsigned long offset) +{ + /* + * Returning the value truncates it to 16 bits. + */ + unsigned int seg; + + switch (offset) { + case offsetof(struct user_regs_struct, fs): + if (task == current) { + /* Older gas can't assemble movq %?s,%r?? */ + asm("movl %%fs,%0" : "=r" (seg)); + return seg; + } + return task->thread.fsindex; + case offsetof(struct user_regs_struct, gs): + if (task == current) { + asm("movl %%gs,%0" : "=r" (seg)); + return seg; + } + return task->thread.gsindex; + case offsetof(struct user_regs_struct, ds): + if (task == current) { + asm("movl %%ds,%0" : "=r" (seg)); + return seg; + } + return task->thread.ds; + case offsetof(struct user_regs_struct, es): + if (task == current) { + asm("movl %%es,%0" : "=r" (seg)); + return seg; + } + return task->thread.es; + + case offsetof(struct user_regs_struct, cs): + case offsetof(struct user_regs_struct, ss): + break; + } + return *pt_regs_access(task_pt_regs(task), offset); +} + +static int set_segment_reg(struct task_struct *task, + unsigned long offset, u16 value) +{ + /* + * The value argument was already truncated to 16 bits. + */ + if (invalid_selector(value)) + return -EIO; + + switch (offset) { + case offsetof(struct user_regs_struct,fs): + /* + * If this is setting fs as for normal 64-bit use but + * setting fs_base has implicitly changed it, leave it. + */ + if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && + task->thread.fs != 0) || + (value == 0 && task->thread.fsindex == FS_TLS_SEL && + task->thread.fs == 0)) + break; + task->thread.fsindex = value; + if (task == current) + loadsegment(fs, task->thread.fsindex); + break; + case offsetof(struct user_regs_struct,gs): + /* + * If this is setting gs as for normal 64-bit use but + * setting gs_base has implicitly changed it, leave it. + */ + if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && + task->thread.gs != 0) || + (value == 0 && task->thread.gsindex == GS_TLS_SEL && + task->thread.gs == 0)) + break; + task->thread.gsindex = value; + if (task == current) + load_gs_index(task->thread.gsindex); + break; + case offsetof(struct user_regs_struct,ds): + task->thread.ds = value; + if (task == current) + loadsegment(ds, task->thread.ds); + break; + case offsetof(struct user_regs_struct,es): + task->thread.es = value; + if (task == current) + loadsegment(es, task->thread.es); + break; + + /* + * Can't actually change these in 64-bit mode. + */ + case offsetof(struct user_regs_struct,cs): +#ifdef CONFIG_IA32_EMULATION + if (test_tsk_thread_flag(task, TIF_IA32)) + task_pt_regs(task)->cs = value; +#endif + break; + case offsetof(struct user_regs_struct,ss): +#ifdef CONFIG_IA32_EMULATION + if (test_tsk_thread_flag(task, TIF_IA32)) + task_pt_regs(task)->ss = value; +#endif + break; + } + + return 0; +} + +static unsigned long debugreg_addr_limit(struct task_struct *task) +{ +#ifdef CONFIG_IA32_EMULATION + if (test_tsk_thread_flag(task, TIF_IA32)) + return IA32_PAGE_OFFSET - 3; +#endif + return TASK_SIZE64 - 7; +} + +#endif /* CONFIG_X86_32 */ + +static unsigned long get_flags(struct task_struct *task) +{ + unsigned long retval = task_pt_regs(task)->flags; + + /* + * If the debugger set TF, hide it from the readout. + */ + if (test_tsk_thread_flag(task, TIF_FORCED_TF)) + retval &= ~X86_EFLAGS_TF; + + return retval; +} + +static int set_flags(struct task_struct *task, unsigned long value) +{ + struct pt_regs *regs = task_pt_regs(task); + + /* + * If the user value contains TF, mark that + * it was not "us" (the debugger) that set it. + * If not, make sure it stays set if we had. + */ + if (value & X86_EFLAGS_TF) + clear_tsk_thread_flag(task, TIF_FORCED_TF); + else if (test_tsk_thread_flag(task, TIF_FORCED_TF)) + value |= X86_EFLAGS_TF; + + regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK); + + return 0; +} + +static int putreg(struct task_struct *child, + unsigned long offset, unsigned long value) +{ + switch (offset) { + case offsetof(struct user_regs_struct, cs): + case offsetof(struct user_regs_struct, ds): + case offsetof(struct user_regs_struct, es): + case offsetof(struct user_regs_struct, fs): + case offsetof(struct user_regs_struct, gs): + case offsetof(struct user_regs_struct, ss): + return set_segment_reg(child, offset, value); + + case offsetof(struct user_regs_struct, flags): + return set_flags(child, value); + +#ifdef CONFIG_X86_64 + case offsetof(struct user_regs_struct,fs_base): + if (value >= TASK_SIZE_OF(child)) + return -EIO; + /* + * When changing the segment base, use do_arch_prctl + * to set either thread.fs or thread.fsindex and the + * corresponding GDT slot. + */ + if (child->thread.fs != value) + return do_arch_prctl(child, ARCH_SET_FS, value); + return 0; + case offsetof(struct user_regs_struct,gs_base): + /* + * Exactly the same here as the %fs handling above. + */ + if (value >= TASK_SIZE_OF(child)) + return -EIO; + if (child->thread.gs != value) + return do_arch_prctl(child, ARCH_SET_GS, value); + return 0; +#endif + } + + *pt_regs_access(task_pt_regs(child), offset) = value; + return 0; +} + +static unsigned long getreg(struct task_struct *task, unsigned long offset) +{ + switch (offset) { + case offsetof(struct user_regs_struct, cs): + case offsetof(struct user_regs_struct, ds): + case offsetof(struct user_regs_struct, es): + case offsetof(struct user_regs_struct, fs): + case offsetof(struct user_regs_struct, gs): + case offsetof(struct user_regs_struct, ss): + return get_segment_reg(task, offset); + + case offsetof(struct user_regs_struct, flags): + return get_flags(task); + +#ifdef CONFIG_X86_64 + case offsetof(struct user_regs_struct, fs_base): { + /* + * do_arch_prctl may have used a GDT slot instead of + * the MSR. To userland, it appears the same either + * way, except the %fs segment selector might not be 0. + */ + unsigned int seg = task->thread.fsindex; + if (task->thread.fs != 0) + return task->thread.fs; + if (task == current) + asm("movl %%fs,%0" : "=r" (seg)); + if (seg != FS_TLS_SEL) + return 0; + return get_desc_base(&task->thread.tls_array[FS_TLS]); + } + case offsetof(struct user_regs_struct, gs_base): { + /* + * Exactly the same here as the %fs handling above. + */ + unsigned int seg = task->thread.gsindex; + if (task->thread.gs != 0) + return task->thread.gs; + if (task == current) + asm("movl %%gs,%0" : "=r" (seg)); + if (seg != GS_TLS_SEL) + return 0; + return get_desc_base(&task->thread.tls_array[GS_TLS]); + } +#endif + } + + return *pt_regs_access(task_pt_regs(task), offset); +} + +/* + * This function is trivial and will be inlined by the compiler. + * Having it separates the implementation details of debug + * registers from the interface details of ptrace. + */ +static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +{ + switch (n) { + case 0: return child->thread.debugreg0; + case 1: return child->thread.debugreg1; + case 2: return child->thread.debugreg2; + case 3: return child->thread.debugreg3; + case 6: return child->thread.debugreg6; + case 7: return child->thread.debugreg7; + } + return 0; +} + +static int ptrace_set_debugreg(struct task_struct *child, + int n, unsigned long data) +{ + int i; + + if (unlikely(n == 4 || n == 5)) + return -EIO; + + if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) + return -EIO; + + switch (n) { + case 0: child->thread.debugreg0 = data; break; + case 1: child->thread.debugreg1 = data; break; + case 2: child->thread.debugreg2 = data; break; + case 3: child->thread.debugreg3 = data; break; + + case 6: + if ((data & ~0xffffffffUL) != 0) + return -EIO; + child->thread.debugreg6 = data; + break; + + case 7: + /* + * Sanity-check data. Take one half-byte at once with + * check = (val >> (16 + 4*i)) & 0xf. It contains the + * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits + * 2 and 3 are LENi. Given a list of invalid values, + * we do mask |= 1 << invalid_value, so that + * (mask >> check) & 1 is a correct test for invalid + * values. + * + * R/Wi contains the type of the breakpoint / + * watchpoint, LENi contains the length of the watched + * data in the watchpoint case. + * + * The invalid values are: + * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] + * - R/Wi == 0x10 (break on I/O reads or writes), so + * mask |= 0x4444. + * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= + * 0x1110. + * + * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. + * + * See the Intel Manual "System Programming Guide", + * 15.2.4 + * + * Note that LENi == 0x10 is defined on x86_64 in long + * mode (i.e. even for 32-bit userspace software, but + * 64-bit kernel), so the x86_64 mask value is 0x5454. + * See the AMD manual no. 24593 (AMD64 System Programming) + */ +#ifdef CONFIG_X86_32 +#define DR7_MASK 0x5f54 +#else +#define DR7_MASK 0x5554 +#endif + data &= ~DR_CONTROL_RESERVED; + for (i = 0; i < 4; i++) + if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) + return -EIO; + child->thread.debugreg7 = data; + if (data) + set_tsk_thread_flag(child, TIF_DEBUG); + else + clear_tsk_thread_flag(child, TIF_DEBUG); + break; + } + + return 0; +} + +static int ptrace_bts_max_buffer_size(void) +{ + return PTRACE_BTS_BUFFER_MAX; +} + +static int ptrace_bts_get_buffer_size(struct task_struct *child) +{ + if (!child->thread.ds_area_msr) + return -ENXIO; + + return ds_get_bts_size((void *)child->thread.ds_area_msr); +} + +static int ptrace_bts_get_index(struct task_struct *child) +{ + if (!child->thread.ds_area_msr) + return -ENXIO; + + return ds_get_bts_index((void *)child->thread.ds_area_msr); +} + +static int ptrace_bts_read_record(struct task_struct *child, + long index, + struct bts_struct __user *out) +{ + struct bts_struct ret; + int retval; + + if (!child->thread.ds_area_msr) + return -ENXIO; + + retval = ds_read_bts((void *)child->thread.ds_area_msr, + index, &ret); + if (retval) + return retval; + + if (copy_to_user(out, &ret, sizeof(ret))) + return -EFAULT; + + return sizeof(ret); +} + +static int ptrace_bts_write_record(struct task_struct *child, + const struct bts_struct *in) +{ + int retval; + + if (!child->thread.ds_area_msr) + return -ENXIO; + + retval = ds_write_bts((void *)child->thread.ds_area_msr, in); + if (retval) + return retval; + + return sizeof(*in); +} + +static int ptrace_bts_config(struct task_struct *child, + unsigned long options) +{ + unsigned long debugctl_mask = ds_debugctl_mask(); + int retval; + + retval = ptrace_bts_get_buffer_size(child); + if (retval < 0) + return retval; + if (retval == 0) + return -ENXIO; + + if (options & PTRACE_BTS_O_TRACE_TASK) { + child->thread.debugctlmsr |= debugctl_mask; + set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + } else { + /* there is no way for us to check whether we 'own' + * the respective bits in the DEBUGCTL MSR, we're + * about to clear */ + child->thread.debugctlmsr &= ~debugctl_mask; + + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + } + + if (options & PTRACE_BTS_O_TIMESTAMPS) + set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + else + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + + return 0; +} + +static int ptrace_bts_status(struct task_struct *child) +{ + unsigned long debugctl_mask = ds_debugctl_mask(); + int retval, status = 0; + + retval = ptrace_bts_get_buffer_size(child); + if (retval < 0) + return retval; + if (retval == 0) + return -ENXIO; + + if (ptrace_bts_get_buffer_size(child) <= 0) + return -ENXIO; + + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & debugctl_mask) + status |= PTRACE_BTS_O_TRACE_TASK; + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + status |= PTRACE_BTS_O_TIMESTAMPS; + + return status; +} + +static int ptrace_bts_allocate_bts(struct task_struct *child, + int size_in_records) +{ + int retval = 0; + void *ds; + + if (size_in_records < 0) + return -EINVAL; + + if (size_in_records > ptrace_bts_max_buffer_size()) + return -EINVAL; + + if (size_in_records == 0) { + ptrace_bts_config(child, /* options = */ 0); + } else { + retval = ds_allocate(&ds, size_in_records); + if (retval) + return retval; + } + + if (child->thread.ds_area_msr) + ds_free((void **)&child->thread.ds_area_msr); + + child->thread.ds_area_msr = (unsigned long)ds; + if (child->thread.ds_area_msr) + set_tsk_thread_flag(child, TIF_DS_AREA_MSR); + else + clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); + + return retval; +} + +void ptrace_bts_take_timestamp(struct task_struct *tsk, + enum bts_qualifier qualifier) +{ + struct bts_struct rec = { + .qualifier = qualifier, + .variant.timestamp = sched_clock() + }; + + if (ptrace_bts_get_buffer_size(tsk) <= 0) + return; + + ptrace_bts_write_record(tsk, &rec); +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void ptrace_disable(struct task_struct *child) +{ + user_disable_single_step(child); +#ifdef TIF_SYSCALL_EMU + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); +#endif + ptrace_bts_config(child, /* options = */ 0); + if (child->thread.ds_area_msr) { + ds_free((void **)&child->thread.ds_area_msr); + clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); + } +} + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) +{ + int i, ret; + unsigned long __user *datap = (unsigned long __user *)data; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: + ret = generic_ptrace_peekdata(child, addr, data); + break; + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long tmp; + + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr < 0 || + addr >= sizeof(struct user)) + break; + + tmp = 0; /* Default return condition */ + if (addr < sizeof(struct user_regs_struct)) + tmp = getreg(child, addr); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + tmp = ptrace_get_debugreg(child, addr / sizeof(data)); + } + ret = put_user(tmp, datap); + break; + } + + /* when I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = generic_ptrace_pokedata(child, addr, data); + break; + + case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + ret = -EIO; + if ((addr & (sizeof(data) - 1)) || addr < 0 || + addr >= sizeof(struct user)) + break; + + if (addr < sizeof(struct user_regs_struct)) + ret = putreg(child, addr, data); + else if (addr >= offsetof(struct user, u_debugreg[0]) && + addr <= offsetof(struct user, u_debugreg[7])) { + addr -= offsetof(struct user, u_debugreg[0]); + ret = ptrace_set_debugreg(child, + addr / sizeof(data), data); + } + break; + + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_regs_struct))) { + ret = -EIO; + break; + } + for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) { + __put_user(getreg(child, i), datap); + datap++; + } + ret = 0; + break; + } + + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + if (!access_ok(VERIFY_READ, datap, sizeof(struct user_regs_struct))) { + ret = -EIO; + break; + } + for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) { + __get_user(tmp, datap); + putreg(child, i, tmp); + datap++; + } + ret = 0; + break; + } + + case PTRACE_GETFPREGS: { /* Get the child FPU state. */ + if (!access_ok(VERIFY_WRITE, datap, + sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + ret = 0; + if (!tsk_used_math(child)) + init_fpu(child); + get_fpregs((struct user_i387_struct __user *)data, child); + break; + } + + case PTRACE_SETFPREGS: { /* Set the child FPU state. */ + if (!access_ok(VERIFY_READ, datap, + sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + set_stopped_child_used_math(child); + set_fpregs(child, (struct user_i387_struct __user *)data); + ret = 0; + break; + } + +#ifdef CONFIG_X86_32 + case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ + if (!access_ok(VERIFY_WRITE, datap, + sizeof(struct user_fxsr_struct))) { + ret = -EIO; + break; + } + if (!tsk_used_math(child)) + init_fpu(child); + ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); + break; + } + + case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ + if (!access_ok(VERIFY_READ, datap, + sizeof(struct user_fxsr_struct))) { + ret = -EIO; + break; + } + set_stopped_child_used_math(child); + ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); + break; + } +#endif + +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + case PTRACE_GET_THREAD_AREA: + if (addr < 0) + return -EIO; + ret = do_get_thread_area(child, addr, + (struct user_desc __user *) data); + break; + + case PTRACE_SET_THREAD_AREA: + if (addr < 0) + return -EIO; + ret = do_set_thread_area(child, addr, + (struct user_desc __user *) data, 0); + break; +#endif + +#ifdef CONFIG_X86_64 + /* normal 64bit interface to access TLS data. + Works just like arch_prctl, except that the arguments + are reversed. */ + case PTRACE_ARCH_PRCTL: + ret = do_arch_prctl(child, data, addr); + break; +#endif + + case PTRACE_BTS_MAX_BUFFER_SIZE: + ret = ptrace_bts_max_buffer_size(); + break; + + case PTRACE_BTS_ALLOCATE_BUFFER: + ret = ptrace_bts_allocate_bts(child, data); + break; + + case PTRACE_BTS_GET_BUFFER_SIZE: + ret = ptrace_bts_get_buffer_size(child); + break; + + case PTRACE_BTS_GET_INDEX: + ret = ptrace_bts_get_index(child); + break; + + case PTRACE_BTS_READ_RECORD: + ret = ptrace_bts_read_record + (child, data, + (struct bts_struct __user *) addr); + break; + + case PTRACE_BTS_CONFIG: + ret = ptrace_bts_config(child, data); + break; + + case PTRACE_BTS_STATUS: + ret = ptrace_bts_status(child); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +#ifdef CONFIG_IA32_EMULATION + +#include +#include +#include +#include +#include + +#define R32(l,q) \ + case offsetof(struct user32, regs.l): \ + regs->q = value; break + +#define SEG32(rs) \ + case offsetof(struct user32, regs.rs): \ + return set_segment_reg(child, \ + offsetof(struct user_regs_struct, rs), \ + value); \ + break + +static int putreg32(struct task_struct *child, unsigned regno, u32 value) +{ + struct pt_regs *regs = task_pt_regs(child); + + switch (regno) { + + SEG32(cs); + SEG32(ds); + SEG32(es); + SEG32(fs); + SEG32(gs); + SEG32(ss); + + R32(ebx, bx); + R32(ecx, cx); + R32(edx, dx); + R32(edi, di); + R32(esi, si); + R32(ebp, bp); + R32(eax, ax); + R32(orig_eax, orig_ax); + R32(eip, ip); + R32(esp, sp); + + case offsetof(struct user32, regs.eflags): + return set_flags(child, value); + + case offsetof(struct user32, u_debugreg[0]) ... + offsetof(struct user32, u_debugreg[7]): + regno -= offsetof(struct user32, u_debugreg[0]); + return ptrace_set_debugreg(child, regno / 4, value); + + default: + if (regno > sizeof(struct user32) || (regno & 3)) + return -EIO; + + /* + * Other dummy fields in the virtual user structure + * are ignored + */ + break; + } + return 0; +} + +#undef R32 +#undef SEG32 + +#define R32(l,q) \ + case offsetof(struct user32, regs.l): \ + *val = regs->q; break + +#define SEG32(rs) \ + case offsetof(struct user32, regs.rs): \ + *val = get_segment_reg(child, \ + offsetof(struct user_regs_struct, rs)); \ + break + +static int getreg32(struct task_struct *child, unsigned regno, u32 *val) +{ + struct pt_regs *regs = task_pt_regs(child); + + switch (regno) { + + SEG32(ds); + SEG32(es); + SEG32(fs); + SEG32(gs); + + R32(cs, cs); + R32(ss, ss); + R32(ebx, bx); + R32(ecx, cx); + R32(edx, dx); + R32(edi, di); + R32(esi, si); + R32(ebp, bp); + R32(eax, ax); + R32(orig_eax, orig_ax); + R32(eip, ip); + R32(esp, sp); + + case offsetof(struct user32, regs.eflags): + *val = get_flags(child); + break; + + case offsetof(struct user32, u_debugreg[0]) ... + offsetof(struct user32, u_debugreg[7]): + regno -= offsetof(struct user32, u_debugreg[0]); + *val = ptrace_get_debugreg(child, regno / 4); + break; + + default: + if (regno > sizeof(struct user32) || (regno & 3)) + return -EIO; + + /* + * Other dummy fields in the virtual user structure + * are ignored + */ + *val = 0; + break; + } + return 0; +} + +#undef R32 +#undef SEG32 + +static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) +{ + siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); + compat_siginfo_t __user *si32 = compat_ptr(data); + siginfo_t ssi; + int ret; + + if (request == PTRACE_SETSIGINFO) { + memset(&ssi, 0, sizeof(siginfo_t)); + ret = copy_siginfo_from_user32(&ssi, si32); + if (ret) + return ret; + if (copy_to_user(si, &ssi, sizeof(siginfo_t))) + return -EFAULT; + } + ret = sys_ptrace(request, pid, addr, (unsigned long)si); + if (ret) + return ret; + if (request == PTRACE_GETSIGINFO) { + if (copy_from_user(&ssi, si, sizeof(siginfo_t))) + return -EFAULT; + ret = copy_siginfo_to_user32(si32, &ssi); + } + return ret; +} + +asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) +{ + struct task_struct *child; + struct pt_regs *childregs; + void __user *datap = compat_ptr(data); + int ret; + __u32 val; + + switch (request) { + case PTRACE_TRACEME: + case PTRACE_ATTACH: + case PTRACE_KILL: + case PTRACE_CONT: + case PTRACE_SINGLESTEP: + case PTRACE_SINGLEBLOCK: + case PTRACE_DETACH: + case PTRACE_SYSCALL: + case PTRACE_OLDSETOPTIONS: + case PTRACE_SETOPTIONS: + case PTRACE_SET_THREAD_AREA: + case PTRACE_GET_THREAD_AREA: + case PTRACE_BTS_MAX_BUFFER_SIZE: + case PTRACE_BTS_ALLOCATE_BUFFER: + case PTRACE_BTS_GET_BUFFER_SIZE: + case PTRACE_BTS_GET_INDEX: + case PTRACE_BTS_READ_RECORD: + case PTRACE_BTS_CONFIG: + case PTRACE_BTS_STATUS: + return sys_ptrace(request, pid, addr, data); + + default: + return -EINVAL; + + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + case PTRACE_POKEDATA: + case PTRACE_POKETEXT: + case PTRACE_POKEUSR: + case PTRACE_PEEKUSR: + case PTRACE_GETREGS: + case PTRACE_SETREGS: + case PTRACE_SETFPREGS: + case PTRACE_GETFPREGS: + case PTRACE_SETFPXREGS: + case PTRACE_GETFPXREGS: + case PTRACE_GETEVENTMSG: + break; + + case PTRACE_SETSIGINFO: + case PTRACE_GETSIGINFO: + return ptrace32_siginfo(request, pid, addr, data); + } + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) + return PTR_ERR(child); + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out; + + childregs = task_pt_regs(child); + + switch (request) { + case PTRACE_PEEKDATA: + case PTRACE_PEEKTEXT: + ret = 0; + if (access_process_vm(child, addr, &val, sizeof(u32), 0) != + sizeof(u32)) + ret = -EIO; + else + ret = put_user(val, (unsigned int __user *)datap); + break; + + case PTRACE_POKEDATA: + case PTRACE_POKETEXT: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(u32), 1) != + sizeof(u32)) + ret = -EIO; + break; + + case PTRACE_PEEKUSR: + ret = getreg32(child, addr, &val); + if (ret == 0) + ret = put_user(val, (__u32 __user *)datap); + break; + + case PTRACE_POKEUSR: + ret = putreg32(child, addr, data); + break; + + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + int i; + + if (!access_ok(VERIFY_WRITE, datap, 16*4)) { + ret = -EIO; + break; + } + ret = 0; + for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(__u32)) { + getreg32(child, i, &val); + ret |= __put_user(val, (u32 __user *)datap); + datap += sizeof(u32); + } + break; + } + + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + int i; + + if (!access_ok(VERIFY_READ, datap, 16*4)) { + ret = -EIO; + break; + } + ret = 0; + for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(u32)) { + ret |= __get_user(tmp, (u32 __user *)datap); + putreg32(child, i, tmp); + datap += sizeof(u32); + } + break; + } + + case PTRACE_GETFPREGS: + ret = -EIO; + if (!access_ok(VERIFY_READ, compat_ptr(data), + sizeof(struct user_i387_struct))) + break; + save_i387_ia32(child, datap, childregs, 1); + ret = 0; + break; + + case PTRACE_SETFPREGS: + ret = -EIO; + if (!access_ok(VERIFY_WRITE, datap, + sizeof(struct user_i387_struct))) + break; + ret = 0; + /* don't check EFAULT to be bug-to-bug compatible to i386 */ + restore_i387_ia32(child, datap, 1); + break; + + case PTRACE_GETFPXREGS: { + struct user32_fxsr_struct __user *u = datap; + + init_fpu(child); + ret = -EIO; + if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + break; + ret = -EFAULT; + if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u))) + break; + ret = __put_user(childregs->cs, &u->fcs); + ret |= __put_user(child->thread.ds, &u->fos); + break; + } + case PTRACE_SETFPXREGS: { + struct user32_fxsr_struct __user *u = datap; + + unlazy_fpu(child); + ret = -EIO; + if (!access_ok(VERIFY_READ, u, sizeof(*u))) + break; + /* + * no checking to be bug-to-bug compatible with i386. + * but silence warning + */ + if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u))) + ; + set_stopped_child_used_math(child); + child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + ret = 0; + break; + } + + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, + (unsigned int __user *)compat_ptr(data)); + break; + + default: + BUG(); + } + + out: + put_task_struct(child); + return ret; +} + +#endif /* CONFIG_IA32_EMULATION */ + +#ifdef CONFIG_X86_32 + +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +{ + struct siginfo info; + + tsk->thread.trap_no = 1; + tsk->thread.error_code = error_code; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGTRAP; + info.si_code = TRAP_BRKPT; + + /* User-mode ip? */ + info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; + + /* Send us the fake SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); +} + +/* notification of system call entry/exit + * - triggered by current->work.syscall_trace + */ +__attribute__((regparm(3))) +int do_syscall_trace(struct pt_regs *regs, int entryexit) +{ + int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); + /* + * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall + * interception + */ + int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); + int ret = 0; + + /* do the secure computing check first */ + if (!entryexit) + secure_computing(regs->orig_ax); + + if (unlikely(current->audit_context)) { + if (entryexit) + audit_syscall_exit(AUDITSC_RESULT(regs->ax), + regs->ax); + /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only + * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is + * not used, entry.S will call us only on syscall exit, not + * entry; so when TIF_SYSCALL_AUDIT is used we must avoid + * calling send_sigtrap() on syscall entry. + * + * Note that when PTRACE_SYSEMU_SINGLESTEP is used, + * is_singlestep is false, despite his name, so we will still do + * the correct thing. + */ + else if (is_singlestep) + goto out; + } + + if (!(current->ptrace & PT_PTRACED)) + goto out; + + /* If a process stops on the 1st tracepoint with SYSCALL_TRACE + * and then is resumed with SYSEMU_SINGLESTEP, it will come in + * here. We have to check this and return */ + if (is_sysemu && entryexit) + return 0; + + /* Fake a debug trap */ + if (is_singlestep) + send_sigtrap(current, regs, 0); + + if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) + goto out; + + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + /* Note that the debugger could change the result of test_thread_flag!*/ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } + ret = is_sysemu; +out: + if (unlikely(current->audit_context) && !entryexit) + audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, + regs->bx, regs->cx, regs->dx, regs->si); + if (ret == 0) + return 0; + + regs->orig_ax = -1; /* force skip of syscall restarting */ + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); + return 1; +} + +#else /* CONFIG_X86_64 */ + +static void syscall_trace(struct pt_regs *regs) +{ + +#if 0 + printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", + current->comm, + regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), + current_thread_info()->flags, current->ptrace); +#endif + + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + +asmlinkage void syscall_trace_enter(struct pt_regs *regs) +{ + /* do the secure computing check first */ + secure_computing(regs->orig_ax); + + if (test_thread_flag(TIF_SYSCALL_TRACE) + && (current->ptrace & PT_PTRACED)) + syscall_trace(regs); + + if (unlikely(current->audit_context)) { + if (test_thread_flag(TIF_IA32)) { + audit_syscall_entry(AUDIT_ARCH_I386, + regs->orig_ax, + regs->bx, regs->cx, + regs->dx, regs->si); + } else { + audit_syscall_entry(AUDIT_ARCH_X86_64, + regs->orig_ax, + regs->di, regs->si, + regs->dx, regs->r10); + } + } +} + +asmlinkage void syscall_trace_leave(struct pt_regs *regs) +{ + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); + + if ((test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SINGLESTEP)) + && (current->ptrace & PT_PTRACED)) + syscall_trace(regs); +} + +#endif /* CONFIG_X86_32 */ diff -puN arch/x86/kernel/ptrace_32.c~git-x86 /dev/null --- a/arch/x86/kernel/ptrace_32.c +++ /dev/null @@ -1,717 +0,0 @@ -/* By Ross Biro 1/23/92 */ -/* - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). - * Also masks reserved bits (31-22, 15, 5, 3, 1). - */ -#define FLAG_MASK 0x00050dd5 - -/* set's the trap flag. */ -#define TRAP_FLAG 0x100 - -/* - * Offset of eflags on child stack.. - */ -#define EFL_OFFSET offsetof(struct pt_regs, eflags) - -static inline struct pt_regs *get_child_regs(struct task_struct *task) -{ - void *stack_top = (void *)task->thread.esp0; - return stack_top - sizeof(struct pt_regs); -} - -/* - * This routine will get a word off of the processes privileged stack. - * the offset is bytes into the pt_regs structure on the stack. - * This routine assumes that all the privileged stacks are in our - * data space. - */ -static inline int get_stack_long(struct task_struct *task, int offset) -{ - unsigned char *stack; - - stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); - stack += offset; - return (*((int *)stack)); -} - -/* - * This routine will put a word on the processes privileged stack. - * the offset is bytes into the pt_regs structure on the stack. - * This routine assumes that all the privileged stacks are in our - * data space. - */ -static inline int put_stack_long(struct task_struct *task, int offset, - unsigned long data) -{ - unsigned char * stack; - - stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); - stack += offset; - *(unsigned long *) stack = data; - return 0; -} - -static int putreg(struct task_struct *child, - unsigned long regno, unsigned long value) -{ - switch (regno >> 2) { - case GS: - if (value && (value & 3) != 3) - return -EIO; - child->thread.gs = value; - return 0; - case DS: - case ES: - case FS: - if (value && (value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - case SS: - case CS: - if ((value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - case EFL: - value &= FLAG_MASK; - value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; - break; - } - if (regno > FS*4) - regno -= 1*4; - put_stack_long(child, regno, value); - return 0; -} - -static unsigned long getreg(struct task_struct *child, - unsigned long regno) -{ - unsigned long retval = ~0UL; - - switch (regno >> 2) { - case GS: - retval = child->thread.gs; - break; - case DS: - case ES: - case FS: - case SS: - case CS: - retval = 0xffff; - /* fall through */ - default: - if (regno > FS*4) - regno -= 1*4; - retval &= get_stack_long(child, regno); - } - return retval; -} - -#define LDT_SEGMENT 4 - -static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs) -{ - unsigned long addr, seg; - - addr = regs->eip; - seg = regs->xcs & 0xffff; - if (regs->eflags & VM_MASK) { - addr = (addr & 0xffff) + (seg << 4); - return addr; - } - - /* - * We'll assume that the code segments in the GDT - * are all zero-based. That is largely true: the - * TLS segments are used for data, and the PNPBIOS - * and APM bios ones we just ignore here. - */ - if (seg & LDT_SEGMENT) { - u32 *desc; - unsigned long base; - - seg &= ~7UL; - - mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) - addr = -1L; /* bogus selector, access would fault */ - else { - desc = child->mm->context.ldt + seg; - base = ((desc[0] >> 16) | - ((desc[1] & 0xff) << 16) | - (desc[1] & 0xff000000)); - - /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) - addr &= 0xffff; - addr += base; - } - mutex_unlock(&child->mm->context.lock); - } - return addr; -} - -static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) -{ - int i, copied; - unsigned char opcode[15]; - unsigned long addr = convert_eip_to_linear(child, regs); - - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); - for (i = 0; i < copied; i++) { - switch (opcode[i]) { - /* popf and iret */ - case 0x9d: case 0xcf: - return 1; - /* opcode and address size prefixes */ - case 0x66: case 0x67: - continue; - /* irrelevant prefixes (segment overrides and repeats) */ - case 0x26: case 0x2e: - case 0x36: case 0x3e: - case 0x64: case 0x65: - case 0xf0: case 0xf2: case 0xf3: - continue; - - /* - * pushf: NOTE! We should probably not let - * the user see the TF bit being set. But - * it's more pain than it's worth to avoid - * it, and a debugger could emulate this - * all in user space if it _really_ cares. - */ - case 0x9c: - default: - return 0; - } - } - return 0; -} - -static void set_singlestep(struct task_struct *child) -{ - struct pt_regs *regs = get_child_regs(child); - - /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also - * cause us to set TF when returning to user mode. - */ - set_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* - * If TF was already set, don't do anything else - */ - if (regs->eflags & TRAP_FLAG) - return; - - /* Set TF on the kernel stack.. */ - regs->eflags |= TRAP_FLAG; - - /* - * ..but if TF is changed by the instruction we will trace, - * don't mark it as being "us" that set it, so that we - * won't clear it by hand later. - */ - if (is_setting_trap_flag(child, regs)) - return; - - child->ptrace |= PT_DTRACE; -} - -static void clear_singlestep(struct task_struct *child) -{ - /* Always clear TIF_SINGLESTEP... */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* But touch TF only if it was set by us.. */ - if (child->ptrace & PT_DTRACE) { - struct pt_regs *regs = get_child_regs(child); - regs->eflags &= ~TRAP_FLAG; - child->ptrace &= ~PT_DTRACE; - } -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. - */ -void ptrace_disable(struct task_struct *child) -{ - clear_singlestep(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -} - -/* - * Perform get_thread_area on behalf of the traced child. - */ -static int -ptrace_get_thread_area(struct task_struct *child, - int idx, struct user_desc __user *user_desc) -{ - struct user_desc info; - struct desc_struct *desc; - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - - if (copy_to_user(user_desc, &info, sizeof(info))) - return -EFAULT; - - return 0; -} - -/* - * Perform set_thread_area on behalf of the traced child. - */ -static int -ptrace_set_thread_area(struct task_struct *child, - int idx, struct user_desc __user *user_desc) -{ - struct user_desc info; - struct desc_struct *desc; - - if (copy_from_user(&info, user_desc, sizeof(info))) - return -EFAULT; - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - - return 0; -} - -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - struct user * dummy = NULL; - int i, ret; - unsigned long __user *datap = (unsigned long __user *)data; - - switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - break; - - tmp = 0; /* Default return condition */ - if(addr < FRAME_SIZE*sizeof(long)) - tmp = getreg(child, addr); - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - addr -= (long) &dummy->u_debugreg[0]; - addr = addr >> 2; - tmp = child->thread.debugreg[addr]; - } - ret = put_user(tmp, datap); - break; - } - - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - break; - - if (addr < FRAME_SIZE*sizeof(long)) { - ret = putreg(child, addr, data); - break; - } - /* We need to be very careful here. We implicitly - want to modify a portion of the task_struct, and we - have to be selective about what portions we allow someone - to modify. */ - - ret = -EIO; - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - - if(addr == (long) &dummy->u_debugreg[4]) break; - if(addr == (long) &dummy->u_debugreg[5]) break; - if(addr < (long) &dummy->u_debugreg[4] && - ((unsigned long) data) >= TASK_SIZE-3) break; - - /* Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System - * Programming)*/ - - if(addr == (long) &dummy->u_debugreg[7]) { - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) - goto out_tsk; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - } - addr -= (long) &dummy->u_debugreg; - addr = addr >> 2; - child->thread.debugreg[addr] = data; - ret = 0; - } - break; - - case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSEMU) { - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } else if (request == PTRACE_SYSCALL) { - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - } else { - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - ret = 0; - break; - -/* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - break; - - case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ - case PTRACE_SINGLESTEP: /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - - if (request == PTRACE_SYSEMU_SINGLESTEP) - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_singlestep(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - break; - - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { - ret = -EIO; - break; - } - for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { - __put_user(getreg(child, i), datap); - datap++; - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) { - ret = -EIO; - break; - } - for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { - __get_user(tmp, datap); - putreg(child, i, tmp); - datap++; - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - ret = 0; - if (!tsk_used_math(child)) - init_fpu(child); - get_fpregs((struct user_i387_struct __user *)data, child); - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - set_fpregs(child, (struct user_i387_struct __user *)data); - ret = 0; - break; - } - - case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - if (!tsk_used_math(child)) - init_fpu(child); - ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); - break; - } - - case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); - break; - } - - case PTRACE_GET_THREAD_AREA: - ret = ptrace_get_thread_area(child, addr, - (struct user_desc __user *) data); - break; - - case PTRACE_SET_THREAD_AREA: - ret = ptrace_set_thread_area(child, addr, - (struct user_desc __user *) data); - break; - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - out_tsk: - return ret; -} - -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) -{ - struct siginfo info; - - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; - - /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; - - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); -} - -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace - */ -__attribute__((regparm(3))) -int do_syscall_trace(struct pt_regs *regs, int entryexit) -{ - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception - */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; - - /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_eax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->eax), - regs->eax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) - goto out; - - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; - - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; -out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax, - regs->ebx, regs->ecx, regs->edx, regs->esi); - if (ret == 0) - return 0; - - regs->orig_eax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); - return 1; -} diff -puN arch/x86/kernel/ptrace_64.c~git-x86 /dev/null --- a/arch/x86/kernel/ptrace_64.c +++ /dev/null @@ -1,621 +0,0 @@ -/* By Ross Biro 1/23/92 */ -/* - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - * - * x86-64 port 2000-2002 Andi Kleen - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). - * Also masks reserved bits (63-22, 15, 5, 3, 1). - */ -#define FLAG_MASK 0x54dd5UL - -/* set's the trap flag. */ -#define TRAP_FLAG 0x100UL - -/* - * eflags and offset of eflags on child stack.. - */ -#define EFLAGS offsetof(struct pt_regs, eflags) -#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs))) - -/* - * this routine will get a word off of the processes privileged stack. - * the offset is how far from the base addr as stored in the TSS. - * this routine assumes that all the privileged stacks are in our - * data space. - */ -static inline unsigned long get_stack_long(struct task_struct *task, int offset) -{ - unsigned char *stack; - - stack = (unsigned char *)task->thread.rsp0; - stack += offset; - return (*((unsigned long *)stack)); -} - -/* - * this routine will put a word on the processes privileged stack. - * the offset is how far from the base addr as stored in the TSS. - * this routine assumes that all the privileged stacks are in our - * data space. - */ -static inline long put_stack_long(struct task_struct *task, int offset, - unsigned long data) -{ - unsigned char * stack; - - stack = (unsigned char *) task->thread.rsp0; - stack += offset; - *(unsigned long *) stack = data; - return 0; -} - -#define LDT_SEGMENT 4 - -unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs) -{ - unsigned long addr, seg; - - addr = regs->rip; - seg = regs->cs & 0xffff; - - /* - * We'll assume that the code segments in the GDT - * are all zero-based. That is largely true: the - * TLS segments are used for data, and the PNPBIOS - * and APM bios ones we just ignore here. - */ - if (seg & LDT_SEGMENT) { - u32 *desc; - unsigned long base; - - seg &= ~7UL; - - mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) - addr = -1L; /* bogus selector, access would fault */ - else { - desc = child->mm->context.ldt + seg; - base = ((desc[0] >> 16) | - ((desc[1] & 0xff) << 16) | - (desc[1] & 0xff000000)); - - /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) - addr &= 0xffff; - addr += base; - } - mutex_unlock(&child->mm->context.lock); - } - - return addr; -} - -static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) -{ - int i, copied; - unsigned char opcode[15]; - unsigned long addr = convert_rip_to_linear(child, regs); - - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); - for (i = 0; i < copied; i++) { - switch (opcode[i]) { - /* popf and iret */ - case 0x9d: case 0xcf: - return 1; - - /* CHECKME: 64 65 */ - - /* opcode and address size prefixes */ - case 0x66: case 0x67: - continue; - /* irrelevant prefixes (segment overrides and repeats) */ - case 0x26: case 0x2e: - case 0x36: case 0x3e: - case 0x64: case 0x65: - case 0xf2: case 0xf3: - continue; - - case 0x40 ... 0x4f: - if (regs->cs != __USER_CS) - /* 32-bit mode: register increment */ - return 0; - /* 64-bit mode: REX prefix */ - continue; - - /* CHECKME: f2, f3 */ - - /* - * pushf: NOTE! We should probably not let - * the user see the TF bit being set. But - * it's more pain than it's worth to avoid - * it, and a debugger could emulate this - * all in user space if it _really_ cares. - */ - case 0x9c: - default: - return 0; - } - } - return 0; -} - -static void set_singlestep(struct task_struct *child) -{ - struct pt_regs *regs = task_pt_regs(child); - - /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also - * cause us to set TF when returning to user mode. - */ - set_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* - * If TF was already set, don't do anything else - */ - if (regs->eflags & TRAP_FLAG) - return; - - /* Set TF on the kernel stack.. */ - regs->eflags |= TRAP_FLAG; - - /* - * ..but if TF is changed by the instruction we will trace, - * don't mark it as being "us" that set it, so that we - * won't clear it by hand later. - */ - if (is_setting_trap_flag(child, regs)) - return; - - child->ptrace |= PT_DTRACE; -} - -static void clear_singlestep(struct task_struct *child) -{ - /* Always clear TIF_SINGLESTEP... */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* But touch TF only if it was set by us.. */ - if (child->ptrace & PT_DTRACE) { - struct pt_regs *regs = task_pt_regs(child); - regs->eflags &= ~TRAP_FLAG; - child->ptrace &= ~PT_DTRACE; - } -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. - */ -void ptrace_disable(struct task_struct *child) -{ - clear_singlestep(child); -} - -static int putreg(struct task_struct *child, - unsigned long regno, unsigned long value) -{ - unsigned long tmp; - - switch (regno) { - case offsetof(struct user_regs_struct,fs): - if (value && (value & 3) != 3) - return -EIO; - child->thread.fsindex = value & 0xffff; - return 0; - case offsetof(struct user_regs_struct,gs): - if (value && (value & 3) != 3) - return -EIO; - child->thread.gsindex = value & 0xffff; - return 0; - case offsetof(struct user_regs_struct,ds): - if (value && (value & 3) != 3) - return -EIO; - child->thread.ds = value & 0xffff; - return 0; - case offsetof(struct user_regs_struct,es): - if (value && (value & 3) != 3) - return -EIO; - child->thread.es = value & 0xffff; - return 0; - case offsetof(struct user_regs_struct,ss): - if ((value & 3) != 3) - return -EIO; - value &= 0xffff; - return 0; - case offsetof(struct user_regs_struct,fs_base): - if (value >= TASK_SIZE_OF(child)) - return -EIO; - child->thread.fs = value; - return 0; - case offsetof(struct user_regs_struct,gs_base): - if (value >= TASK_SIZE_OF(child)) - return -EIO; - child->thread.gs = value; - return 0; - case offsetof(struct user_regs_struct, eflags): - value &= FLAG_MASK; - tmp = get_stack_long(child, EFL_OFFSET); - tmp &= ~FLAG_MASK; - value |= tmp; - break; - case offsetof(struct user_regs_struct,cs): - if ((value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - } - put_stack_long(child, regno - sizeof(struct pt_regs), value); - return 0; -} - -static unsigned long getreg(struct task_struct *child, unsigned long regno) -{ - unsigned long val; - switch (regno) { - case offsetof(struct user_regs_struct, fs): - return child->thread.fsindex; - case offsetof(struct user_regs_struct, gs): - return child->thread.gsindex; - case offsetof(struct user_regs_struct, ds): - return child->thread.ds; - case offsetof(struct user_regs_struct, es): - return child->thread.es; - case offsetof(struct user_regs_struct, fs_base): - return child->thread.fs; - case offsetof(struct user_regs_struct, gs_base): - return child->thread.gs; - default: - regno = regno - sizeof(struct pt_regs); - val = get_stack_long(child, regno); - if (test_tsk_thread_flag(child, TIF_IA32)) - val &= 0xffffffff; - return val; - } - -} - -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - long i, ret; - unsigned ui; - - switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & 7) || - addr > sizeof(struct user) - 7) - break; - - switch (addr) { - case 0 ... sizeof(struct user_regs_struct) - sizeof(long): - tmp = getreg(child, addr); - break; - case offsetof(struct user, u_debugreg[0]): - tmp = child->thread.debugreg0; - break; - case offsetof(struct user, u_debugreg[1]): - tmp = child->thread.debugreg1; - break; - case offsetof(struct user, u_debugreg[2]): - tmp = child->thread.debugreg2; - break; - case offsetof(struct user, u_debugreg[3]): - tmp = child->thread.debugreg3; - break; - case offsetof(struct user, u_debugreg[6]): - tmp = child->thread.debugreg6; - break; - case offsetof(struct user, u_debugreg[7]): - tmp = child->thread.debugreg7; - break; - default: - tmp = 0; - break; - } - ret = put_user(tmp,(unsigned long __user *) data); - break; - } - - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - { - int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7; - ret = -EIO; - if ((addr & 7) || - addr > sizeof(struct user) - 7) - break; - - switch (addr) { - case 0 ... sizeof(struct user_regs_struct) - sizeof(long): - ret = putreg(child, addr, data); - break; - /* Disallows to set a breakpoint into the vsyscall */ - case offsetof(struct user, u_debugreg[0]): - if (data >= TASK_SIZE_OF(child) - dsize) break; - child->thread.debugreg0 = data; - ret = 0; - break; - case offsetof(struct user, u_debugreg[1]): - if (data >= TASK_SIZE_OF(child) - dsize) break; - child->thread.debugreg1 = data; - ret = 0; - break; - case offsetof(struct user, u_debugreg[2]): - if (data >= TASK_SIZE_OF(child) - dsize) break; - child->thread.debugreg2 = data; - ret = 0; - break; - case offsetof(struct user, u_debugreg[3]): - if (data >= TASK_SIZE_OF(child) - dsize) break; - child->thread.debugreg3 = data; - ret = 0; - break; - case offsetof(struct user, u_debugreg[6]): - if (data >> 32) - break; - child->thread.debugreg6 = data; - ret = 0; - break; - case offsetof(struct user, u_debugreg[7]): - /* See arch/i386/kernel/ptrace.c for an explanation of - * this awkward check.*/ - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) - break; - if (i == 4) { - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - ret = 0; - } - break; - } - break; - } - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: /* restart after signal. */ - - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child,TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - ret = 0; - break; - -#ifdef CONFIG_IA32_EMULATION - /* This makes only sense with 32bit programs. Allow a - 64bit debugger to fully examine them too. Better - don't use it against 64bit processes, use - PTRACE_ARCH_PRCTL instead. */ - case PTRACE_SET_THREAD_AREA: { - struct user_desc __user *p; - int old; - p = (struct user_desc __user *)data; - get_user(old, &p->entry_number); - put_user(addr, &p->entry_number); - ret = do_set_thread_area(&child->thread, p); - put_user(old, &p->entry_number); - break; - case PTRACE_GET_THREAD_AREA: - p = (struct user_desc __user *)data; - get_user(old, &p->entry_number); - put_user(addr, &p->entry_number); - ret = do_get_thread_area(&child->thread, p); - put_user(old, &p->entry_number); - break; - } -#endif - /* normal 64bit interface to access TLS data. - Works just like arch_prctl, except that the arguments - are reversed. */ - case PTRACE_ARCH_PRCTL: - ret = do_arch_prctl(child, data, addr); - break; - -/* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - break; - - case PTRACE_SINGLESTEP: /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); - set_singlestep(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - break; - - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, - sizeof(struct user_regs_struct))) { - ret = -EIO; - break; - } - ret = 0; - for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { - ret |= __put_user(getreg(child, ui),(unsigned long __user *) data); - data += sizeof(long); - } - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - if (!access_ok(VERIFY_READ, (unsigned __user *)data, - sizeof(struct user_regs_struct))) { - ret = -EIO; - break; - } - ret = 0; - for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { - ret = __get_user(tmp, (unsigned long __user *) data); - if (ret) - break; - ret = putreg(child, ui, tmp); - if (ret) - break; - data += sizeof(long); - } - break; - } - - case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */ - if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - ret = get_fpregs((struct user_i387_struct __user *)data, child); - break; - } - - case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */ - if (!access_ok(VERIFY_READ, (unsigned __user *)data, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - ret = set_fpregs(child, (struct user_i387_struct __user *)data); - break; - } - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - return ret; -} - -static void syscall_trace(struct pt_regs *regs) -{ - -#if 0 - printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); -#endif - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} - -asmlinkage void syscall_trace_enter(struct pt_regs *regs) -{ - /* do the secure computing check first */ - secure_computing(regs->orig_rax); - - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(regs); - - if (unlikely(current->audit_context)) { - if (test_thread_flag(TIF_IA32)) { - audit_syscall_entry(AUDIT_ARCH_I386, - regs->orig_rax, - regs->rbx, regs->rcx, - regs->rdx, regs->rsi); - } else { - audit_syscall_entry(AUDIT_ARCH_X86_64, - regs->orig_rax, - regs->rdi, regs->rsi, - regs->rdx, regs->r10); - } - } -} - -asmlinkage void syscall_trace_leave(struct pt_regs *regs) -{ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); - - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) - syscall_trace(regs); -} diff -puN arch/x86/kernel/reboot_fixups_32.c~git-x86 arch/x86/kernel/reboot_fixups_32.c --- a/arch/x86/kernel/reboot_fixups_32.c~git-x86 +++ a/arch/x86/kernel/reboot_fixups_32.c @@ -30,6 +30,19 @@ static void cs5536_warm_reset(struct pci udelay(50); /* shouldn't get here but be safe and spin a while */ } +static void rdc321x_reset(struct pci_dev *dev) +{ + unsigned i; + /* Voluntary reset the watchdog timer */ + outl(0x80003840, 0xCF8); + /* Generate a CPU reset on next tick */ + i = inl(0xCFC); + /* Use the minimum timer resolution */ + i |= 0x1600; + outl(i, 0xCFC); + outb(1, 0x92); +} + struct device_fixup { unsigned int vendor; unsigned int device; @@ -40,6 +53,7 @@ static struct device_fixup fixups_table[ { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, +{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, }; /* diff -puN /dev/null arch/x86/kernel/rtc.c --- /dev/null +++ a/arch/x86/kernel/rtc.c @@ -0,0 +1,196 @@ +/* + * RTC related functions + */ +#include +#include +#include + +#include + +#ifdef CONFIG_X86_32 +# define CMOS_YEARS_OFFS 1900 +/* + * This is a special lock that is owned by the CPU and holds the index + * register we are working with. It is required for NMI access to the + * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. + */ +volatile unsigned long cmos_lock = 0; +EXPORT_SYMBOL(cmos_lock); +#else +/* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ +# define CMOS_YEARS_OFFS 2000 +#endif + +DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); + +/* + * In order to set the CMOS clock precisely, set_rtc_mmss has to be + * called 500 ms after the second nowtime has started, because when + * nowtime is written into the registers of the CMOS clock, it will + * jump to the next second precisely 500 ms later. Check the Motorola + * MC146818A or Dallas DS12887 data sheet for details. + * + * BUG: This routine does not handle hour overflow properly; it just + * sets the minutes. Usually you'll only notice that after reboot! + */ +int mach_set_rtc_mmss(unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + /* tell the clock it's being set */ + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + /* stop and reset prescaler */ + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + BCD_TO_BIN(cmos_minutes); + + /* + * since we're only adjusting minutes and seconds, + * don't interfere with hour overflow. This avoids + * messing with unknown time zones but requires your + * RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + /* correct for half hour time zone */ + if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) + real_minutes += 30; + real_minutes %= 60; + + if (abs(real_minutes - cmos_minutes) < 30) { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } else { + printk(KERN_WARNING + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + + return retval; +} + +unsigned long mach_get_cmos_time(void) +{ + unsigned int year, mon, day, hour, min, sec, century = 0; + + /* + * If UIP is clear, then we have >= 244 microseconds before + * RTC registers will be updated. Spec sheet says that this + * is the reliable way to read RTC - registers. If UIP is set + * then the register access might be invalid. + */ + while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) + cpu_relax(); + + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + +#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64) + /* CHECKME: Is this really 64bit only ??? */ + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + century = CMOS_READ(acpi_gbl_FADT.century); +#endif + + if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + + if (century) { + BCD_TO_BIN(century); + year += century * 100; + printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); + } else { + year += CMOS_YEARS_OFFS; + if (year < 1970) + year += 100; + } + + return mktime(year, mon, day, hour, min, sec); +} + +/* Routines for accessing the CMOS RAM/RTC. */ +unsigned char rtc_cmos_read(unsigned char addr) +{ + unsigned char val; + + lock_cmos_prefix(addr); + outb_p(addr, RTC_PORT(0)); + val = inb_p(RTC_PORT(1)); + lock_cmos_suffix(addr); + return val; +} +EXPORT_SYMBOL(rtc_cmos_read); + +void rtc_cmos_write(unsigned char val, unsigned char addr) +{ + lock_cmos_prefix(addr); + outb_p(addr, RTC_PORT(0)); + outb_p(val, RTC_PORT(1)); + lock_cmos_suffix(addr); +} +EXPORT_SYMBOL(rtc_cmos_write); + +static int set_rtc_mmss(unsigned long nowtime) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + retval = set_wallclock(nowtime); + spin_unlock_irqrestore(&rtc_lock, flags); + + return retval; +} + +/* not static: needed by APM */ +unsigned long read_persistent_clock(void) +{ + unsigned long retval, flags; + + spin_lock_irqsave(&rtc_lock, flags); + retval = get_wallclock(); + spin_unlock_irqrestore(&rtc_lock, flags); + + return retval; +} + +int update_persistent_clock(struct timespec now) +{ + return set_rtc_mmss(now.tv_sec); +} diff -puN arch/x86/kernel/setup64.c~git-x86 arch/x86/kernel/setup64.c --- a/arch/x86/kernel/setup64.c~git-x86 +++ a/arch/x86/kernel/setup64.c @@ -169,7 +169,8 @@ void syscall_init(void) #endif /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } void __cpuinit check_efer(void) diff -puN arch/x86/kernel/setup_32.c~git-x86 arch/x86/kernel/setup_32.c --- a/arch/x86/kernel/setup_32.c~git-x86 +++ a/arch/x86/kernel/setup_32.c @@ -44,6 +44,7 @@ #include #include #include +#include #include